diff options
author | Chocobozzz <me@florianbigard.com> | 2021-10-21 16:28:39 +0200 |
---|---|---|
committer | Chocobozzz <me@florianbigard.com> | 2021-10-22 10:25:24 +0200 |
commit | 62549e6c9818f422698f030e0b242609115493ed (patch) | |
tree | 12a969f694239fe5f926f779698df9523605ee80 /server/helpers/youtube-dl.ts | |
parent | a71d4140a5b7831dbe2eb7a0dfaa6a755cb2e906 (diff) | |
download | PeerTube-62549e6c9818f422698f030e0b242609115493ed.tar.gz PeerTube-62549e6c9818f422698f030e0b242609115493ed.tar.zst PeerTube-62549e6c9818f422698f030e0b242609115493ed.zip |
Rewrite youtube-dl import
Use python3 binary
Allows to use a custom youtube-dl release URL
Allows to use yt-dlp (youtube-dl fork)
Remove proxy config from configuration to use HTTP_PROXY and HTTPS_PROXY
env variables
Diffstat (limited to 'server/helpers/youtube-dl.ts')
-rw-r--r-- | server/helpers/youtube-dl.ts | 394 |
1 files changed, 0 insertions, 394 deletions
diff --git a/server/helpers/youtube-dl.ts b/server/helpers/youtube-dl.ts deleted file mode 100644 index 0392ec4c7..000000000 --- a/server/helpers/youtube-dl.ts +++ /dev/null | |||
@@ -1,394 +0,0 @@ | |||
1 | import { createWriteStream } from 'fs' | ||
2 | import { ensureDir, move, pathExists, remove, writeFile } from 'fs-extra' | ||
3 | import { join } from 'path' | ||
4 | import { CONFIG } from '@server/initializers/config' | ||
5 | import { HttpStatusCode } from '../../shared/models/http/http-error-codes' | ||
6 | import { VideoResolution } from '../../shared/models/videos' | ||
7 | import { CONSTRAINTS_FIELDS, VIDEO_CATEGORIES, VIDEO_LANGUAGES, VIDEO_LICENCES } from '../initializers/constants' | ||
8 | import { peertubeTruncate, pipelinePromise, root } from './core-utils' | ||
9 | import { isVideoFileExtnameValid } from './custom-validators/videos' | ||
10 | import { logger } from './logger' | ||
11 | import { peertubeGot } from './requests' | ||
12 | import { generateVideoImportTmpPath } from './utils' | ||
13 | |||
14 | export type YoutubeDLInfo = { | ||
15 | name?: string | ||
16 | description?: string | ||
17 | category?: number | ||
18 | language?: string | ||
19 | licence?: number | ||
20 | nsfw?: boolean | ||
21 | tags?: string[] | ||
22 | thumbnailUrl?: string | ||
23 | ext?: string | ||
24 | originallyPublishedAt?: Date | ||
25 | } | ||
26 | |||
27 | export type YoutubeDLSubs = { | ||
28 | language: string | ||
29 | filename: string | ||
30 | path: string | ||
31 | }[] | ||
32 | |||
33 | const processOptions = { | ||
34 | maxBuffer: 1024 * 1024 * 10 // 10MB | ||
35 | } | ||
36 | |||
37 | class YoutubeDL { | ||
38 | |||
39 | constructor (private readonly url: string = '', private readonly enabledResolutions: number[] = []) { | ||
40 | |||
41 | } | ||
42 | |||
43 | getYoutubeDLInfo (opts?: string[]): Promise<YoutubeDLInfo> { | ||
44 | return new Promise<YoutubeDLInfo>((res, rej) => { | ||
45 | let args = opts || [] | ||
46 | |||
47 | if (CONFIG.IMPORT.VIDEOS.HTTP.FORCE_IPV4) { | ||
48 | args.push('--force-ipv4') | ||
49 | } | ||
50 | |||
51 | args = this.wrapWithProxyOptions(args) | ||
52 | args = [ '-f', this.getYoutubeDLVideoFormat() ].concat(args) | ||
53 | |||
54 | YoutubeDL.safeGetYoutubeDL() | ||
55 | .then(youtubeDL => { | ||
56 | youtubeDL.getInfo(this.url, args, processOptions, (err, info) => { | ||
57 | if (err) return rej(err) | ||
58 | if (info.is_live === true) return rej(new Error('Cannot download a live streaming.')) | ||
59 | |||
60 | const obj = this.buildVideoInfo(this.normalizeObject(info)) | ||
61 | if (obj.name && obj.name.length < CONSTRAINTS_FIELDS.VIDEOS.NAME.min) obj.name += ' video' | ||
62 | |||
63 | return res(obj) | ||
64 | }) | ||
65 | }) | ||
66 | .catch(err => rej(err)) | ||
67 | }) | ||
68 | } | ||
69 | |||
70 | getYoutubeDLSubs (opts?: object): Promise<YoutubeDLSubs> { | ||
71 | return new Promise<YoutubeDLSubs>((res, rej) => { | ||
72 | const cwd = CONFIG.STORAGE.TMP_DIR | ||
73 | const options = opts || { all: true, format: 'vtt', cwd } | ||
74 | |||
75 | YoutubeDL.safeGetYoutubeDL() | ||
76 | .then(youtubeDL => { | ||
77 | youtubeDL.getSubs(this.url, options, (err, files) => { | ||
78 | if (err) return rej(err) | ||
79 | if (!files) return [] | ||
80 | |||
81 | logger.debug('Get subtitles from youtube dl.', { url: this.url, files }) | ||
82 | |||
83 | const subtitles = files.reduce((acc, filename) => { | ||
84 | const matched = filename.match(/\.([a-z]{2})(-[a-z]+)?\.(vtt|ttml)/i) | ||
85 | if (!matched || !matched[1]) return acc | ||
86 | |||
87 | return [ | ||
88 | ...acc, | ||
89 | { | ||
90 | language: matched[1], | ||
91 | path: join(cwd, filename), | ||
92 | filename | ||
93 | } | ||
94 | ] | ||
95 | }, []) | ||
96 | |||
97 | return res(subtitles) | ||
98 | }) | ||
99 | }) | ||
100 | .catch(err => rej(err)) | ||
101 | }) | ||
102 | } | ||
103 | |||
104 | getYoutubeDLVideoFormat () { | ||
105 | /** | ||
106 | * list of format selectors in order or preference | ||
107 | * see https://github.com/ytdl-org/youtube-dl#format-selection | ||
108 | * | ||
109 | * case #1 asks for a mp4 using h264 (avc1) and the exact resolution in the hope | ||
110 | * of being able to do a "quick-transcode" | ||
111 | * case #2 is the first fallback. No "quick-transcode" means we can get anything else (like vp9) | ||
112 | * case #3 is the resolution-degraded equivalent of #1, and already a pretty safe fallback | ||
113 | * | ||
114 | * in any case we avoid AV1, see https://github.com/Chocobozzz/PeerTube/issues/3499 | ||
115 | **/ | ||
116 | const resolution = this.enabledResolutions.length === 0 | ||
117 | ? VideoResolution.H_720P | ||
118 | : Math.max(...this.enabledResolutions) | ||
119 | |||
120 | return [ | ||
121 | `bestvideo[vcodec^=avc1][height=${resolution}]+bestaudio[ext=m4a]`, // case #1 | ||
122 | `bestvideo[vcodec!*=av01][vcodec!*=vp9.2][height=${resolution}]+bestaudio`, // case #2 | ||
123 | `bestvideo[vcodec^=avc1][height<=${resolution}]+bestaudio[ext=m4a]`, // case #3 | ||
124 | `bestvideo[vcodec!*=av01][vcodec!*=vp9.2]+bestaudio`, | ||
125 | 'best[vcodec!*=av01][vcodec!*=vp9.2]', // case fallback for known formats | ||
126 | 'best' // Ultimate fallback | ||
127 | ].join('/') | ||
128 | } | ||
129 | |||
130 | downloadYoutubeDLVideo (fileExt: string, timeout: number) { | ||
131 | // Leave empty the extension, youtube-dl will add it | ||
132 | const pathWithoutExtension = generateVideoImportTmpPath(this.url, '') | ||
133 | |||
134 | let timer | ||
135 | |||
136 | logger.info('Importing youtubeDL video %s to %s', this.url, pathWithoutExtension) | ||
137 | |||
138 | let options = [ '-f', this.getYoutubeDLVideoFormat(), '-o', pathWithoutExtension ] | ||
139 | options = this.wrapWithProxyOptions(options) | ||
140 | |||
141 | if (process.env.FFMPEG_PATH) { | ||
142 | options = options.concat([ '--ffmpeg-location', process.env.FFMPEG_PATH ]) | ||
143 | } | ||
144 | |||
145 | logger.debug('YoutubeDL options for %s.', this.url, { options }) | ||
146 | |||
147 | return new Promise<string>((res, rej) => { | ||
148 | YoutubeDL.safeGetYoutubeDL() | ||
149 | .then(youtubeDL => { | ||
150 | youtubeDL.exec(this.url, options, processOptions, async err => { | ||
151 | clearTimeout(timer) | ||
152 | |||
153 | try { | ||
154 | // If youtube-dl did not guess an extension for our file, just use .mp4 as default | ||
155 | if (await pathExists(pathWithoutExtension)) { | ||
156 | await move(pathWithoutExtension, pathWithoutExtension + '.mp4') | ||
157 | } | ||
158 | |||
159 | const path = await this.guessVideoPathWithExtension(pathWithoutExtension, fileExt) | ||
160 | |||
161 | if (err) { | ||
162 | remove(path) | ||
163 | .catch(err => logger.error('Cannot delete path on YoutubeDL error.', { err })) | ||
164 | |||
165 | return rej(err) | ||
166 | } | ||
167 | |||
168 | return res(path) | ||
169 | } catch (err) { | ||
170 | return rej(err) | ||
171 | } | ||
172 | }) | ||
173 | |||
174 | timer = setTimeout(() => { | ||
175 | const err = new Error('YoutubeDL download timeout.') | ||
176 | |||
177 | this.guessVideoPathWithExtension(pathWithoutExtension, fileExt) | ||
178 | .then(path => remove(path)) | ||
179 | .finally(() => rej(err)) | ||
180 | .catch(err => { | ||
181 | logger.error('Cannot remove file in youtubeDL timeout.', { err }) | ||
182 | return rej(err) | ||
183 | }) | ||
184 | }, timeout) | ||
185 | }) | ||
186 | .catch(err => rej(err)) | ||
187 | }) | ||
188 | } | ||
189 | |||
190 | buildOriginallyPublishedAt (obj: any) { | ||
191 | let originallyPublishedAt: Date = null | ||
192 | |||
193 | const uploadDateMatcher = /^(\d{4})(\d{2})(\d{2})$/.exec(obj.upload_date) | ||
194 | if (uploadDateMatcher) { | ||
195 | originallyPublishedAt = new Date() | ||
196 | originallyPublishedAt.setHours(0, 0, 0, 0) | ||
197 | |||
198 | const year = parseInt(uploadDateMatcher[1], 10) | ||
199 | // Month starts from 0 | ||
200 | const month = parseInt(uploadDateMatcher[2], 10) - 1 | ||
201 | const day = parseInt(uploadDateMatcher[3], 10) | ||
202 | |||
203 | originallyPublishedAt.setFullYear(year, month, day) | ||
204 | } | ||
205 | |||
206 | return originallyPublishedAt | ||
207 | } | ||
208 | |||
209 | private async guessVideoPathWithExtension (tmpPath: string, sourceExt: string) { | ||
210 | if (!isVideoFileExtnameValid(sourceExt)) { | ||
211 | throw new Error('Invalid video extension ' + sourceExt) | ||
212 | } | ||
213 | |||
214 | const extensions = [ sourceExt, '.mp4', '.mkv', '.webm' ] | ||
215 | |||
216 | for (const extension of extensions) { | ||
217 | const path = tmpPath + extension | ||
218 | |||
219 | if (await pathExists(path)) return path | ||
220 | } | ||
221 | |||
222 | throw new Error('Cannot guess path of ' + tmpPath) | ||
223 | } | ||
224 | |||
225 | private normalizeObject (obj: any) { | ||
226 | const newObj: any = {} | ||
227 | |||
228 | for (const key of Object.keys(obj)) { | ||
229 | // Deprecated key | ||
230 | if (key === 'resolution') continue | ||
231 | |||
232 | const value = obj[key] | ||
233 | |||
234 | if (typeof value === 'string') { | ||
235 | newObj[key] = value.normalize() | ||
236 | } else { | ||
237 | newObj[key] = value | ||
238 | } | ||
239 | } | ||
240 | |||
241 | return newObj | ||
242 | } | ||
243 | |||
244 | private buildVideoInfo (obj: any): YoutubeDLInfo { | ||
245 | return { | ||
246 | name: this.titleTruncation(obj.title), | ||
247 | description: this.descriptionTruncation(obj.description), | ||
248 | category: this.getCategory(obj.categories), | ||
249 | licence: this.getLicence(obj.license), | ||
250 | language: this.getLanguage(obj.language), | ||
251 | nsfw: this.isNSFW(obj), | ||
252 | tags: this.getTags(obj.tags), | ||
253 | thumbnailUrl: obj.thumbnail || undefined, | ||
254 | originallyPublishedAt: this.buildOriginallyPublishedAt(obj), | ||
255 | ext: obj.ext | ||
256 | } | ||
257 | } | ||
258 | |||
259 | private titleTruncation (title: string) { | ||
260 | return peertubeTruncate(title, { | ||
261 | length: CONSTRAINTS_FIELDS.VIDEOS.NAME.max, | ||
262 | separator: /,? +/, | ||
263 | omission: ' […]' | ||
264 | }) | ||
265 | } | ||
266 | |||
267 | private descriptionTruncation (description: string) { | ||
268 | if (!description || description.length < CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.min) return undefined | ||
269 | |||
270 | return peertubeTruncate(description, { | ||
271 | length: CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.max, | ||
272 | separator: /,? +/, | ||
273 | omission: ' […]' | ||
274 | }) | ||
275 | } | ||
276 | |||
277 | private isNSFW (info: any) { | ||
278 | return info.age_limit && info.age_limit >= 16 | ||
279 | } | ||
280 | |||
281 | private getTags (tags: any) { | ||
282 | if (Array.isArray(tags) === false) return [] | ||
283 | |||
284 | return tags | ||
285 | .filter(t => t.length < CONSTRAINTS_FIELDS.VIDEOS.TAG.max && t.length > CONSTRAINTS_FIELDS.VIDEOS.TAG.min) | ||
286 | .map(t => t.normalize()) | ||
287 | .slice(0, 5) | ||
288 | } | ||
289 | |||
290 | private getLicence (licence: string) { | ||
291 | if (!licence) return undefined | ||
292 | |||
293 | if (licence.includes('Creative Commons Attribution')) return 1 | ||
294 | |||
295 | for (const key of Object.keys(VIDEO_LICENCES)) { | ||
296 | const peertubeLicence = VIDEO_LICENCES[key] | ||
297 | if (peertubeLicence.toLowerCase() === licence.toLowerCase()) return parseInt(key, 10) | ||
298 | } | ||
299 | |||
300 | return undefined | ||
301 | } | ||
302 | |||
303 | private getCategory (categories: string[]) { | ||
304 | if (!categories) return undefined | ||
305 | |||
306 | const categoryString = categories[0] | ||
307 | if (!categoryString || typeof categoryString !== 'string') return undefined | ||
308 | |||
309 | if (categoryString === 'News & Politics') return 11 | ||
310 | |||
311 | for (const key of Object.keys(VIDEO_CATEGORIES)) { | ||
312 | const category = VIDEO_CATEGORIES[key] | ||
313 | if (categoryString.toLowerCase() === category.toLowerCase()) return parseInt(key, 10) | ||
314 | } | ||
315 | |||
316 | return undefined | ||
317 | } | ||
318 | |||
319 | private getLanguage (language: string) { | ||
320 | return VIDEO_LANGUAGES[language] ? language : undefined | ||
321 | } | ||
322 | |||
323 | private wrapWithProxyOptions (options: string[]) { | ||
324 | if (CONFIG.IMPORT.VIDEOS.HTTP.PROXY.ENABLED) { | ||
325 | logger.debug('Using proxy for YoutubeDL') | ||
326 | |||
327 | return [ '--proxy', CONFIG.IMPORT.VIDEOS.HTTP.PROXY.URL ].concat(options) | ||
328 | } | ||
329 | |||
330 | return options | ||
331 | } | ||
332 | |||
333 | // Thanks: https://github.com/przemyslawpluta/node-youtube-dl/blob/master/lib/downloader.js | ||
334 | // We rewrote it to avoid sync calls | ||
335 | static async updateYoutubeDLBinary () { | ||
336 | logger.info('Updating youtubeDL binary.') | ||
337 | |||
338 | const binDirectory = join(root(), 'node_modules', 'youtube-dl', 'bin') | ||
339 | const bin = join(binDirectory, 'youtube-dl') | ||
340 | const detailsPath = join(binDirectory, 'details') | ||
341 | const url = process.env.YOUTUBE_DL_DOWNLOAD_HOST || 'https://yt-dl.org/downloads/latest/youtube-dl' | ||
342 | |||
343 | await ensureDir(binDirectory) | ||
344 | |||
345 | try { | ||
346 | const gotContext = { bodyKBLimit: 20_000 } | ||
347 | |||
348 | const result = await peertubeGot(url, { followRedirect: false, context: gotContext }) | ||
349 | |||
350 | if (result.statusCode !== HttpStatusCode.FOUND_302) { | ||
351 | logger.error('youtube-dl update error: did not get redirect for the latest version link. Status %d', result.statusCode) | ||
352 | return | ||
353 | } | ||
354 | |||
355 | const newUrl = result.headers.location | ||
356 | const newVersion = /\/(\d{4}\.\d\d\.\d\d(\.\d)?)\/youtube-dl$/.exec(newUrl)[1] | ||
357 | |||
358 | const downloadFileStream = peertubeGot.stream(newUrl, { context: gotContext }) | ||
359 | const writeStream = createWriteStream(bin, { mode: 493 }) | ||
360 | |||
361 | await pipelinePromise( | ||
362 | downloadFileStream, | ||
363 | writeStream | ||
364 | ) | ||
365 | |||
366 | const details = JSON.stringify({ version: newVersion, path: bin, exec: 'youtube-dl' }) | ||
367 | await writeFile(detailsPath, details, { encoding: 'utf8' }) | ||
368 | |||
369 | logger.info('youtube-dl updated to version %s.', newVersion) | ||
370 | } catch (err) { | ||
371 | logger.error('Cannot update youtube-dl.', { err }) | ||
372 | } | ||
373 | } | ||
374 | |||
375 | static async safeGetYoutubeDL () { | ||
376 | let youtubeDL | ||
377 | |||
378 | try { | ||
379 | youtubeDL = require('youtube-dl') | ||
380 | } catch (e) { | ||
381 | // Download binary | ||
382 | await this.updateYoutubeDLBinary() | ||
383 | youtubeDL = require('youtube-dl') | ||
384 | } | ||
385 | |||
386 | return youtubeDL | ||
387 | } | ||
388 | } | ||
389 | |||
390 | // --------------------------------------------------------------------------- | ||
391 | |||
392 | export { | ||
393 | YoutubeDL | ||
394 | } | ||