diff options
author | Chocobozzz <me@florianbigard.com> | 2021-10-21 16:28:39 +0200 |
---|---|---|
committer | Chocobozzz <me@florianbigard.com> | 2021-10-22 10:25:24 +0200 |
commit | 62549e6c9818f422698f030e0b242609115493ed (patch) | |
tree | 12a969f694239fe5f926f779698df9523605ee80 /server/helpers | |
parent | a71d4140a5b7831dbe2eb7a0dfaa6a755cb2e906 (diff) | |
download | PeerTube-62549e6c9818f422698f030e0b242609115493ed.tar.gz PeerTube-62549e6c9818f422698f030e0b242609115493ed.tar.zst PeerTube-62549e6c9818f422698f030e0b242609115493ed.zip |
Rewrite youtube-dl import
Use python3 binary
Allows to use a custom youtube-dl release URL
Allows to use yt-dlp (youtube-dl fork)
Remove proxy config from configuration to use HTTP_PROXY and HTTPS_PROXY
env variables
Diffstat (limited to 'server/helpers')
-rw-r--r-- | server/helpers/requests.ts | 9 | ||||
-rw-r--r-- | server/helpers/youtube-dl.ts | 394 | ||||
-rw-r--r-- | server/helpers/youtube-dl/index.ts | 3 | ||||
-rw-r--r-- | server/helpers/youtube-dl/youtube-dl-cli.ts | 198 | ||||
-rw-r--r-- | server/helpers/youtube-dl/youtube-dl-info-builder.ts | 154 | ||||
-rw-r--r-- | server/helpers/youtube-dl/youtube-dl-wrapper.ts | 135 |
6 files changed, 497 insertions, 396 deletions
diff --git a/server/helpers/requests.ts b/server/helpers/requests.ts index 991270952..d93f55776 100644 --- a/server/helpers/requests.ts +++ b/server/helpers/requests.ts | |||
@@ -1,9 +1,9 @@ | |||
1 | import { createWriteStream, remove } from 'fs-extra' | 1 | import { createWriteStream, remove } from 'fs-extra' |
2 | import got, { CancelableRequest, Options as GotOptions, RequestError } from 'got' | 2 | import got, { CancelableRequest, Options as GotOptions, RequestError, Response } from 'got' |
3 | import { HttpProxyAgent, HttpsProxyAgent } from 'hpagent' | 3 | import { HttpProxyAgent, HttpsProxyAgent } from 'hpagent' |
4 | import { join } from 'path' | 4 | import { join } from 'path' |
5 | import { CONFIG } from '../initializers/config' | 5 | import { CONFIG } from '../initializers/config' |
6 | import { ACTIVITY_PUB, PEERTUBE_VERSION, REQUEST_TIMEOUT, WEBSERVER } from '../initializers/constants' | 6 | import { ACTIVITY_PUB, BINARY_CONTENT_TYPES, PEERTUBE_VERSION, REQUEST_TIMEOUT, WEBSERVER } from '../initializers/constants' |
7 | import { pipelinePromise } from './core-utils' | 7 | import { pipelinePromise } from './core-utils' |
8 | import { processImage } from './image-utils' | 8 | import { processImage } from './image-utils' |
9 | import { logger } from './logger' | 9 | import { logger } from './logger' |
@@ -180,12 +180,17 @@ function getUserAgent () { | |||
180 | return `PeerTube/${PEERTUBE_VERSION} (+${WEBSERVER.URL})` | 180 | return `PeerTube/${PEERTUBE_VERSION} (+${WEBSERVER.URL})` |
181 | } | 181 | } |
182 | 182 | ||
183 | function isBinaryResponse (result: Response<any>) { | ||
184 | return BINARY_CONTENT_TYPES.has(result.headers['content-type']) | ||
185 | } | ||
186 | |||
183 | // --------------------------------------------------------------------------- | 187 | // --------------------------------------------------------------------------- |
184 | 188 | ||
185 | export { | 189 | export { |
186 | doRequest, | 190 | doRequest, |
187 | doJSONRequest, | 191 | doJSONRequest, |
188 | doRequestAndSaveToFile, | 192 | doRequestAndSaveToFile, |
193 | isBinaryResponse, | ||
189 | downloadImage, | 194 | downloadImage, |
190 | peertubeGot | 195 | peertubeGot |
191 | } | 196 | } |
diff --git a/server/helpers/youtube-dl.ts b/server/helpers/youtube-dl.ts deleted file mode 100644 index 0392ec4c7..000000000 --- a/server/helpers/youtube-dl.ts +++ /dev/null | |||
@@ -1,394 +0,0 @@ | |||
1 | import { createWriteStream } from 'fs' | ||
2 | import { ensureDir, move, pathExists, remove, writeFile } from 'fs-extra' | ||
3 | import { join } from 'path' | ||
4 | import { CONFIG } from '@server/initializers/config' | ||
5 | import { HttpStatusCode } from '../../shared/models/http/http-error-codes' | ||
6 | import { VideoResolution } from '../../shared/models/videos' | ||
7 | import { CONSTRAINTS_FIELDS, VIDEO_CATEGORIES, VIDEO_LANGUAGES, VIDEO_LICENCES } from '../initializers/constants' | ||
8 | import { peertubeTruncate, pipelinePromise, root } from './core-utils' | ||
9 | import { isVideoFileExtnameValid } from './custom-validators/videos' | ||
10 | import { logger } from './logger' | ||
11 | import { peertubeGot } from './requests' | ||
12 | import { generateVideoImportTmpPath } from './utils' | ||
13 | |||
14 | export type YoutubeDLInfo = { | ||
15 | name?: string | ||
16 | description?: string | ||
17 | category?: number | ||
18 | language?: string | ||
19 | licence?: number | ||
20 | nsfw?: boolean | ||
21 | tags?: string[] | ||
22 | thumbnailUrl?: string | ||
23 | ext?: string | ||
24 | originallyPublishedAt?: Date | ||
25 | } | ||
26 | |||
27 | export type YoutubeDLSubs = { | ||
28 | language: string | ||
29 | filename: string | ||
30 | path: string | ||
31 | }[] | ||
32 | |||
33 | const processOptions = { | ||
34 | maxBuffer: 1024 * 1024 * 10 // 10MB | ||
35 | } | ||
36 | |||
37 | class YoutubeDL { | ||
38 | |||
39 | constructor (private readonly url: string = '', private readonly enabledResolutions: number[] = []) { | ||
40 | |||
41 | } | ||
42 | |||
43 | getYoutubeDLInfo (opts?: string[]): Promise<YoutubeDLInfo> { | ||
44 | return new Promise<YoutubeDLInfo>((res, rej) => { | ||
45 | let args = opts || [] | ||
46 | |||
47 | if (CONFIG.IMPORT.VIDEOS.HTTP.FORCE_IPV4) { | ||
48 | args.push('--force-ipv4') | ||
49 | } | ||
50 | |||
51 | args = this.wrapWithProxyOptions(args) | ||
52 | args = [ '-f', this.getYoutubeDLVideoFormat() ].concat(args) | ||
53 | |||
54 | YoutubeDL.safeGetYoutubeDL() | ||
55 | .then(youtubeDL => { | ||
56 | youtubeDL.getInfo(this.url, args, processOptions, (err, info) => { | ||
57 | if (err) return rej(err) | ||
58 | if (info.is_live === true) return rej(new Error('Cannot download a live streaming.')) | ||
59 | |||
60 | const obj = this.buildVideoInfo(this.normalizeObject(info)) | ||
61 | if (obj.name && obj.name.length < CONSTRAINTS_FIELDS.VIDEOS.NAME.min) obj.name += ' video' | ||
62 | |||
63 | return res(obj) | ||
64 | }) | ||
65 | }) | ||
66 | .catch(err => rej(err)) | ||
67 | }) | ||
68 | } | ||
69 | |||
70 | getYoutubeDLSubs (opts?: object): Promise<YoutubeDLSubs> { | ||
71 | return new Promise<YoutubeDLSubs>((res, rej) => { | ||
72 | const cwd = CONFIG.STORAGE.TMP_DIR | ||
73 | const options = opts || { all: true, format: 'vtt', cwd } | ||
74 | |||
75 | YoutubeDL.safeGetYoutubeDL() | ||
76 | .then(youtubeDL => { | ||
77 | youtubeDL.getSubs(this.url, options, (err, files) => { | ||
78 | if (err) return rej(err) | ||
79 | if (!files) return [] | ||
80 | |||
81 | logger.debug('Get subtitles from youtube dl.', { url: this.url, files }) | ||
82 | |||
83 | const subtitles = files.reduce((acc, filename) => { | ||
84 | const matched = filename.match(/\.([a-z]{2})(-[a-z]+)?\.(vtt|ttml)/i) | ||
85 | if (!matched || !matched[1]) return acc | ||
86 | |||
87 | return [ | ||
88 | ...acc, | ||
89 | { | ||
90 | language: matched[1], | ||
91 | path: join(cwd, filename), | ||
92 | filename | ||
93 | } | ||
94 | ] | ||
95 | }, []) | ||
96 | |||
97 | return res(subtitles) | ||
98 | }) | ||
99 | }) | ||
100 | .catch(err => rej(err)) | ||
101 | }) | ||
102 | } | ||
103 | |||
104 | getYoutubeDLVideoFormat () { | ||
105 | /** | ||
106 | * list of format selectors in order or preference | ||
107 | * see https://github.com/ytdl-org/youtube-dl#format-selection | ||
108 | * | ||
109 | * case #1 asks for a mp4 using h264 (avc1) and the exact resolution in the hope | ||
110 | * of being able to do a "quick-transcode" | ||
111 | * case #2 is the first fallback. No "quick-transcode" means we can get anything else (like vp9) | ||
112 | * case #3 is the resolution-degraded equivalent of #1, and already a pretty safe fallback | ||
113 | * | ||
114 | * in any case we avoid AV1, see https://github.com/Chocobozzz/PeerTube/issues/3499 | ||
115 | **/ | ||
116 | const resolution = this.enabledResolutions.length === 0 | ||
117 | ? VideoResolution.H_720P | ||
118 | : Math.max(...this.enabledResolutions) | ||
119 | |||
120 | return [ | ||
121 | `bestvideo[vcodec^=avc1][height=${resolution}]+bestaudio[ext=m4a]`, // case #1 | ||
122 | `bestvideo[vcodec!*=av01][vcodec!*=vp9.2][height=${resolution}]+bestaudio`, // case #2 | ||
123 | `bestvideo[vcodec^=avc1][height<=${resolution}]+bestaudio[ext=m4a]`, // case #3 | ||
124 | `bestvideo[vcodec!*=av01][vcodec!*=vp9.2]+bestaudio`, | ||
125 | 'best[vcodec!*=av01][vcodec!*=vp9.2]', // case fallback for known formats | ||
126 | 'best' // Ultimate fallback | ||
127 | ].join('/') | ||
128 | } | ||
129 | |||
130 | downloadYoutubeDLVideo (fileExt: string, timeout: number) { | ||
131 | // Leave empty the extension, youtube-dl will add it | ||
132 | const pathWithoutExtension = generateVideoImportTmpPath(this.url, '') | ||
133 | |||
134 | let timer | ||
135 | |||
136 | logger.info('Importing youtubeDL video %s to %s', this.url, pathWithoutExtension) | ||
137 | |||
138 | let options = [ '-f', this.getYoutubeDLVideoFormat(), '-o', pathWithoutExtension ] | ||
139 | options = this.wrapWithProxyOptions(options) | ||
140 | |||
141 | if (process.env.FFMPEG_PATH) { | ||
142 | options = options.concat([ '--ffmpeg-location', process.env.FFMPEG_PATH ]) | ||
143 | } | ||
144 | |||
145 | logger.debug('YoutubeDL options for %s.', this.url, { options }) | ||
146 | |||
147 | return new Promise<string>((res, rej) => { | ||
148 | YoutubeDL.safeGetYoutubeDL() | ||
149 | .then(youtubeDL => { | ||
150 | youtubeDL.exec(this.url, options, processOptions, async err => { | ||
151 | clearTimeout(timer) | ||
152 | |||
153 | try { | ||
154 | // If youtube-dl did not guess an extension for our file, just use .mp4 as default | ||
155 | if (await pathExists(pathWithoutExtension)) { | ||
156 | await move(pathWithoutExtension, pathWithoutExtension + '.mp4') | ||
157 | } | ||
158 | |||
159 | const path = await this.guessVideoPathWithExtension(pathWithoutExtension, fileExt) | ||
160 | |||
161 | if (err) { | ||
162 | remove(path) | ||
163 | .catch(err => logger.error('Cannot delete path on YoutubeDL error.', { err })) | ||
164 | |||
165 | return rej(err) | ||
166 | } | ||
167 | |||
168 | return res(path) | ||
169 | } catch (err) { | ||
170 | return rej(err) | ||
171 | } | ||
172 | }) | ||
173 | |||
174 | timer = setTimeout(() => { | ||
175 | const err = new Error('YoutubeDL download timeout.') | ||
176 | |||
177 | this.guessVideoPathWithExtension(pathWithoutExtension, fileExt) | ||
178 | .then(path => remove(path)) | ||
179 | .finally(() => rej(err)) | ||
180 | .catch(err => { | ||
181 | logger.error('Cannot remove file in youtubeDL timeout.', { err }) | ||
182 | return rej(err) | ||
183 | }) | ||
184 | }, timeout) | ||
185 | }) | ||
186 | .catch(err => rej(err)) | ||
187 | }) | ||
188 | } | ||
189 | |||
190 | buildOriginallyPublishedAt (obj: any) { | ||
191 | let originallyPublishedAt: Date = null | ||
192 | |||
193 | const uploadDateMatcher = /^(\d{4})(\d{2})(\d{2})$/.exec(obj.upload_date) | ||
194 | if (uploadDateMatcher) { | ||
195 | originallyPublishedAt = new Date() | ||
196 | originallyPublishedAt.setHours(0, 0, 0, 0) | ||
197 | |||
198 | const year = parseInt(uploadDateMatcher[1], 10) | ||
199 | // Month starts from 0 | ||
200 | const month = parseInt(uploadDateMatcher[2], 10) - 1 | ||
201 | const day = parseInt(uploadDateMatcher[3], 10) | ||
202 | |||
203 | originallyPublishedAt.setFullYear(year, month, day) | ||
204 | } | ||
205 | |||
206 | return originallyPublishedAt | ||
207 | } | ||
208 | |||
209 | private async guessVideoPathWithExtension (tmpPath: string, sourceExt: string) { | ||
210 | if (!isVideoFileExtnameValid(sourceExt)) { | ||
211 | throw new Error('Invalid video extension ' + sourceExt) | ||
212 | } | ||
213 | |||
214 | const extensions = [ sourceExt, '.mp4', '.mkv', '.webm' ] | ||
215 | |||
216 | for (const extension of extensions) { | ||
217 | const path = tmpPath + extension | ||
218 | |||
219 | if (await pathExists(path)) return path | ||
220 | } | ||
221 | |||
222 | throw new Error('Cannot guess path of ' + tmpPath) | ||
223 | } | ||
224 | |||
225 | private normalizeObject (obj: any) { | ||
226 | const newObj: any = {} | ||
227 | |||
228 | for (const key of Object.keys(obj)) { | ||
229 | // Deprecated key | ||
230 | if (key === 'resolution') continue | ||
231 | |||
232 | const value = obj[key] | ||
233 | |||
234 | if (typeof value === 'string') { | ||
235 | newObj[key] = value.normalize() | ||
236 | } else { | ||
237 | newObj[key] = value | ||
238 | } | ||
239 | } | ||
240 | |||
241 | return newObj | ||
242 | } | ||
243 | |||
244 | private buildVideoInfo (obj: any): YoutubeDLInfo { | ||
245 | return { | ||
246 | name: this.titleTruncation(obj.title), | ||
247 | description: this.descriptionTruncation(obj.description), | ||
248 | category: this.getCategory(obj.categories), | ||
249 | licence: this.getLicence(obj.license), | ||
250 | language: this.getLanguage(obj.language), | ||
251 | nsfw: this.isNSFW(obj), | ||
252 | tags: this.getTags(obj.tags), | ||
253 | thumbnailUrl: obj.thumbnail || undefined, | ||
254 | originallyPublishedAt: this.buildOriginallyPublishedAt(obj), | ||
255 | ext: obj.ext | ||
256 | } | ||
257 | } | ||
258 | |||
259 | private titleTruncation (title: string) { | ||
260 | return peertubeTruncate(title, { | ||
261 | length: CONSTRAINTS_FIELDS.VIDEOS.NAME.max, | ||
262 | separator: /,? +/, | ||
263 | omission: ' […]' | ||
264 | }) | ||
265 | } | ||
266 | |||
267 | private descriptionTruncation (description: string) { | ||
268 | if (!description || description.length < CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.min) return undefined | ||
269 | |||
270 | return peertubeTruncate(description, { | ||
271 | length: CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.max, | ||
272 | separator: /,? +/, | ||
273 | omission: ' […]' | ||
274 | }) | ||
275 | } | ||
276 | |||
277 | private isNSFW (info: any) { | ||
278 | return info.age_limit && info.age_limit >= 16 | ||
279 | } | ||
280 | |||
281 | private getTags (tags: any) { | ||
282 | if (Array.isArray(tags) === false) return [] | ||
283 | |||
284 | return tags | ||
285 | .filter(t => t.length < CONSTRAINTS_FIELDS.VIDEOS.TAG.max && t.length > CONSTRAINTS_FIELDS.VIDEOS.TAG.min) | ||
286 | .map(t => t.normalize()) | ||
287 | .slice(0, 5) | ||
288 | } | ||
289 | |||
290 | private getLicence (licence: string) { | ||
291 | if (!licence) return undefined | ||
292 | |||
293 | if (licence.includes('Creative Commons Attribution')) return 1 | ||
294 | |||
295 | for (const key of Object.keys(VIDEO_LICENCES)) { | ||
296 | const peertubeLicence = VIDEO_LICENCES[key] | ||
297 | if (peertubeLicence.toLowerCase() === licence.toLowerCase()) return parseInt(key, 10) | ||
298 | } | ||
299 | |||
300 | return undefined | ||
301 | } | ||
302 | |||
303 | private getCategory (categories: string[]) { | ||
304 | if (!categories) return undefined | ||
305 | |||
306 | const categoryString = categories[0] | ||
307 | if (!categoryString || typeof categoryString !== 'string') return undefined | ||
308 | |||
309 | if (categoryString === 'News & Politics') return 11 | ||
310 | |||
311 | for (const key of Object.keys(VIDEO_CATEGORIES)) { | ||
312 | const category = VIDEO_CATEGORIES[key] | ||
313 | if (categoryString.toLowerCase() === category.toLowerCase()) return parseInt(key, 10) | ||
314 | } | ||
315 | |||
316 | return undefined | ||
317 | } | ||
318 | |||
319 | private getLanguage (language: string) { | ||
320 | return VIDEO_LANGUAGES[language] ? language : undefined | ||
321 | } | ||
322 | |||
323 | private wrapWithProxyOptions (options: string[]) { | ||
324 | if (CONFIG.IMPORT.VIDEOS.HTTP.PROXY.ENABLED) { | ||
325 | logger.debug('Using proxy for YoutubeDL') | ||
326 | |||
327 | return [ '--proxy', CONFIG.IMPORT.VIDEOS.HTTP.PROXY.URL ].concat(options) | ||
328 | } | ||
329 | |||
330 | return options | ||
331 | } | ||
332 | |||
333 | // Thanks: https://github.com/przemyslawpluta/node-youtube-dl/blob/master/lib/downloader.js | ||
334 | // We rewrote it to avoid sync calls | ||
335 | static async updateYoutubeDLBinary () { | ||
336 | logger.info('Updating youtubeDL binary.') | ||
337 | |||
338 | const binDirectory = join(root(), 'node_modules', 'youtube-dl', 'bin') | ||
339 | const bin = join(binDirectory, 'youtube-dl') | ||
340 | const detailsPath = join(binDirectory, 'details') | ||
341 | const url = process.env.YOUTUBE_DL_DOWNLOAD_HOST || 'https://yt-dl.org/downloads/latest/youtube-dl' | ||
342 | |||
343 | await ensureDir(binDirectory) | ||
344 | |||
345 | try { | ||
346 | const gotContext = { bodyKBLimit: 20_000 } | ||
347 | |||
348 | const result = await peertubeGot(url, { followRedirect: false, context: gotContext }) | ||
349 | |||
350 | if (result.statusCode !== HttpStatusCode.FOUND_302) { | ||
351 | logger.error('youtube-dl update error: did not get redirect for the latest version link. Status %d', result.statusCode) | ||
352 | return | ||
353 | } | ||
354 | |||
355 | const newUrl = result.headers.location | ||
356 | const newVersion = /\/(\d{4}\.\d\d\.\d\d(\.\d)?)\/youtube-dl$/.exec(newUrl)[1] | ||
357 | |||
358 | const downloadFileStream = peertubeGot.stream(newUrl, { context: gotContext }) | ||
359 | const writeStream = createWriteStream(bin, { mode: 493 }) | ||
360 | |||
361 | await pipelinePromise( | ||
362 | downloadFileStream, | ||
363 | writeStream | ||
364 | ) | ||
365 | |||
366 | const details = JSON.stringify({ version: newVersion, path: bin, exec: 'youtube-dl' }) | ||
367 | await writeFile(detailsPath, details, { encoding: 'utf8' }) | ||
368 | |||
369 | logger.info('youtube-dl updated to version %s.', newVersion) | ||
370 | } catch (err) { | ||
371 | logger.error('Cannot update youtube-dl.', { err }) | ||
372 | } | ||
373 | } | ||
374 | |||
375 | static async safeGetYoutubeDL () { | ||
376 | let youtubeDL | ||
377 | |||
378 | try { | ||
379 | youtubeDL = require('youtube-dl') | ||
380 | } catch (e) { | ||
381 | // Download binary | ||
382 | await this.updateYoutubeDLBinary() | ||
383 | youtubeDL = require('youtube-dl') | ||
384 | } | ||
385 | |||
386 | return youtubeDL | ||
387 | } | ||
388 | } | ||
389 | |||
390 | // --------------------------------------------------------------------------- | ||
391 | |||
392 | export { | ||
393 | YoutubeDL | ||
394 | } | ||
diff --git a/server/helpers/youtube-dl/index.ts b/server/helpers/youtube-dl/index.ts new file mode 100644 index 000000000..6afc77dcf --- /dev/null +++ b/server/helpers/youtube-dl/index.ts | |||
@@ -0,0 +1,3 @@ | |||
1 | export * from './youtube-dl-cli' | ||
2 | export * from './youtube-dl-info-builder' | ||
3 | export * from './youtube-dl-wrapper' | ||
diff --git a/server/helpers/youtube-dl/youtube-dl-cli.ts b/server/helpers/youtube-dl/youtube-dl-cli.ts new file mode 100644 index 000000000..440869205 --- /dev/null +++ b/server/helpers/youtube-dl/youtube-dl-cli.ts | |||
@@ -0,0 +1,198 @@ | |||
1 | import execa from 'execa' | ||
2 | import { pathExists, writeFile } from 'fs-extra' | ||
3 | import { join } from 'path' | ||
4 | import { CONFIG } from '@server/initializers/config' | ||
5 | import { VideoResolution } from '@shared/models' | ||
6 | import { logger, loggerTagsFactory } from '../logger' | ||
7 | import { getProxy, isProxyEnabled } from '../proxy' | ||
8 | import { isBinaryResponse, peertubeGot } from '../requests' | ||
9 | |||
10 | const lTags = loggerTagsFactory('youtube-dl') | ||
11 | |||
12 | const youtubeDLBinaryPath = join(CONFIG.STORAGE.BIN_DIR, CONFIG.IMPORT.VIDEOS.HTTP.YOUTUBE_DL_RELEASE.NAME) | ||
13 | |||
14 | export class YoutubeDLCLI { | ||
15 | |||
16 | static async safeGet () { | ||
17 | if (!await pathExists(youtubeDLBinaryPath)) { | ||
18 | await this.updateYoutubeDLBinary() | ||
19 | } | ||
20 | |||
21 | return new YoutubeDLCLI() | ||
22 | } | ||
23 | |||
24 | static async updateYoutubeDLBinary () { | ||
25 | const url = CONFIG.IMPORT.VIDEOS.HTTP.YOUTUBE_DL_RELEASE.URL | ||
26 | |||
27 | logger.info('Updating youtubeDL binary from %s.', url, lTags()) | ||
28 | |||
29 | const gotOptions = { context: { bodyKBLimit: 20_000 }, responseType: 'buffer' as 'buffer' } | ||
30 | |||
31 | try { | ||
32 | let gotResult = await peertubeGot(url, gotOptions) | ||
33 | |||
34 | if (!isBinaryResponse(gotResult)) { | ||
35 | const json = JSON.parse(gotResult.body.toString()) | ||
36 | const latest = json.filter(release => release.prerelease === false)[0] | ||
37 | if (!latest) throw new Error('Cannot find latest release') | ||
38 | |||
39 | const releaseName = CONFIG.IMPORT.VIDEOS.HTTP.YOUTUBE_DL_RELEASE.NAME | ||
40 | const releaseAsset = latest.assets.find(a => a.name === releaseName) | ||
41 | if (!releaseAsset) throw new Error(`Cannot find appropriate release with name ${releaseName} in release assets`) | ||
42 | |||
43 | gotResult = await peertubeGot(releaseAsset.browser_download_url, gotOptions) | ||
44 | } | ||
45 | |||
46 | if (!isBinaryResponse(gotResult)) { | ||
47 | throw new Error('Not a binary response') | ||
48 | } | ||
49 | |||
50 | await writeFile(youtubeDLBinaryPath, gotResult.body) | ||
51 | |||
52 | logger.info('youtube-dl updated %s.', youtubeDLBinaryPath, lTags()) | ||
53 | } catch (err) { | ||
54 | logger.error('Cannot update youtube-dl from %s.', url, { err, ...lTags() }) | ||
55 | } | ||
56 | } | ||
57 | |||
58 | static getYoutubeDLVideoFormat (enabledResolutions: VideoResolution[]) { | ||
59 | /** | ||
60 | * list of format selectors in order or preference | ||
61 | * see https://github.com/ytdl-org/youtube-dl#format-selection | ||
62 | * | ||
63 | * case #1 asks for a mp4 using h264 (avc1) and the exact resolution in the hope | ||
64 | * of being able to do a "quick-transcode" | ||
65 | * case #2 is the first fallback. No "quick-transcode" means we can get anything else (like vp9) | ||
66 | * case #3 is the resolution-degraded equivalent of #1, and already a pretty safe fallback | ||
67 | * | ||
68 | * in any case we avoid AV1, see https://github.com/Chocobozzz/PeerTube/issues/3499 | ||
69 | **/ | ||
70 | const resolution = enabledResolutions.length === 0 | ||
71 | ? VideoResolution.H_720P | ||
72 | : Math.max(...enabledResolutions) | ||
73 | |||
74 | return [ | ||
75 | `bestvideo[vcodec^=avc1][height=${resolution}]+bestaudio[ext=m4a]`, // case #1 | ||
76 | `bestvideo[vcodec!*=av01][vcodec!*=vp9.2][height=${resolution}]+bestaudio`, // case #2 | ||
77 | `bestvideo[vcodec^=avc1][height<=${resolution}]+bestaudio[ext=m4a]`, // case #3 | ||
78 | `bestvideo[vcodec!*=av01][vcodec!*=vp9.2]+bestaudio`, | ||
79 | 'best[vcodec!*=av01][vcodec!*=vp9.2]', // case fallback for known formats | ||
80 | 'best' // Ultimate fallback | ||
81 | ].join('/') | ||
82 | } | ||
83 | |||
84 | private constructor () { | ||
85 | |||
86 | } | ||
87 | |||
88 | download (options: { | ||
89 | url: string | ||
90 | format: string | ||
91 | output: string | ||
92 | processOptions: execa.NodeOptions | ||
93 | additionalYoutubeDLArgs?: string[] | ||
94 | }) { | ||
95 | return this.run({ | ||
96 | url: options.url, | ||
97 | processOptions: options.processOptions, | ||
98 | args: (options.additionalYoutubeDLArgs || []).concat([ '-f', options.format, '-o', options.output ]) | ||
99 | }) | ||
100 | } | ||
101 | |||
102 | async getInfo (options: { | ||
103 | url: string | ||
104 | format: string | ||
105 | processOptions: execa.NodeOptions | ||
106 | additionalYoutubeDLArgs?: string[] | ||
107 | }) { | ||
108 | const { url, format, additionalYoutubeDLArgs = [], processOptions } = options | ||
109 | |||
110 | const completeArgs = additionalYoutubeDLArgs.concat([ '--dump-json', '-f', format ]) | ||
111 | |||
112 | const data = await this.run({ url, args: completeArgs, processOptions }) | ||
113 | const info = data.map(this.parseInfo) | ||
114 | |||
115 | return info.length === 1 | ||
116 | ? info[0] | ||
117 | : info | ||
118 | } | ||
119 | |||
120 | async getSubs (options: { | ||
121 | url: string | ||
122 | format: 'vtt' | ||
123 | processOptions: execa.NodeOptions | ||
124 | }) { | ||
125 | const { url, format, processOptions } = options | ||
126 | |||
127 | const args = [ '--skip-download', '--all-subs', `--sub-format=${format}` ] | ||
128 | |||
129 | const data = await this.run({ url, args, processOptions }) | ||
130 | const files: string[] = [] | ||
131 | |||
132 | const skipString = '[info] Writing video subtitles to: ' | ||
133 | |||
134 | for (let i = 0, len = data.length; i < len; i++) { | ||
135 | const line = data[i] | ||
136 | |||
137 | if (line.indexOf(skipString) === 0) { | ||
138 | files.push(line.slice(skipString.length)) | ||
139 | } | ||
140 | } | ||
141 | |||
142 | return files | ||
143 | } | ||
144 | |||
145 | private async run (options: { | ||
146 | url: string | ||
147 | args: string[] | ||
148 | processOptions: execa.NodeOptions | ||
149 | }) { | ||
150 | const { url, args, processOptions } = options | ||
151 | |||
152 | let completeArgs = this.wrapWithProxyOptions(args) | ||
153 | completeArgs = this.wrapWithIPOptions(completeArgs) | ||
154 | completeArgs = this.wrapWithFFmpegOptions(completeArgs) | ||
155 | |||
156 | const output = await execa('python', [ youtubeDLBinaryPath, ...completeArgs, url ], processOptions) | ||
157 | |||
158 | logger.debug('Runned youtube-dl command.', { command: output.command, stdout: output.stdout, ...lTags() }) | ||
159 | |||
160 | return output.stdout | ||
161 | ? output.stdout.trim().split(/\r?\n/) | ||
162 | : undefined | ||
163 | } | ||
164 | |||
165 | private wrapWithProxyOptions (args: string[]) { | ||
166 | if (isProxyEnabled()) { | ||
167 | logger.debug('Using proxy %s for YoutubeDL', getProxy(), lTags()) | ||
168 | |||
169 | return [ '--proxy', getProxy() ].concat(args) | ||
170 | } | ||
171 | |||
172 | return args | ||
173 | } | ||
174 | |||
175 | private wrapWithIPOptions (args: string[]) { | ||
176 | if (CONFIG.IMPORT.VIDEOS.HTTP.FORCE_IPV4) { | ||
177 | logger.debug('Force ipv4 for YoutubeDL') | ||
178 | |||
179 | return [ '--force-ipv4' ].concat(args) | ||
180 | } | ||
181 | |||
182 | return args | ||
183 | } | ||
184 | |||
185 | private wrapWithFFmpegOptions (args: string[]) { | ||
186 | if (process.env.FFMPEG_PATH) { | ||
187 | logger.debug('Using ffmpeg location %s for YoutubeDL', process.env.FFMPEG_PATH, lTags()) | ||
188 | |||
189 | return [ '--ffmpeg-location', process.env.FFMPEG_PATH ].concat(args) | ||
190 | } | ||
191 | |||
192 | return args | ||
193 | } | ||
194 | |||
195 | private parseInfo (data: string) { | ||
196 | return JSON.parse(data) | ||
197 | } | ||
198 | } | ||
diff --git a/server/helpers/youtube-dl/youtube-dl-info-builder.ts b/server/helpers/youtube-dl/youtube-dl-info-builder.ts new file mode 100644 index 000000000..9746a7067 --- /dev/null +++ b/server/helpers/youtube-dl/youtube-dl-info-builder.ts | |||
@@ -0,0 +1,154 @@ | |||
1 | import { CONSTRAINTS_FIELDS, VIDEO_CATEGORIES, VIDEO_LANGUAGES, VIDEO_LICENCES } from '../../initializers/constants' | ||
2 | import { peertubeTruncate } from '../core-utils' | ||
3 | |||
4 | type YoutubeDLInfo = { | ||
5 | name?: string | ||
6 | description?: string | ||
7 | category?: number | ||
8 | language?: string | ||
9 | licence?: number | ||
10 | nsfw?: boolean | ||
11 | tags?: string[] | ||
12 | thumbnailUrl?: string | ||
13 | ext?: string | ||
14 | originallyPublishedAt?: Date | ||
15 | } | ||
16 | |||
17 | class YoutubeDLInfoBuilder { | ||
18 | private readonly info: any | ||
19 | |||
20 | constructor (info: any) { | ||
21 | this.info = { ...info } | ||
22 | } | ||
23 | |||
24 | getInfo () { | ||
25 | const obj = this.buildVideoInfo(this.normalizeObject(this.info)) | ||
26 | if (obj.name && obj.name.length < CONSTRAINTS_FIELDS.VIDEOS.NAME.min) obj.name += ' video' | ||
27 | |||
28 | return obj | ||
29 | } | ||
30 | |||
31 | private normalizeObject (obj: any) { | ||
32 | const newObj: any = {} | ||
33 | |||
34 | for (const key of Object.keys(obj)) { | ||
35 | // Deprecated key | ||
36 | if (key === 'resolution') continue | ||
37 | |||
38 | const value = obj[key] | ||
39 | |||
40 | if (typeof value === 'string') { | ||
41 | newObj[key] = value.normalize() | ||
42 | } else { | ||
43 | newObj[key] = value | ||
44 | } | ||
45 | } | ||
46 | |||
47 | return newObj | ||
48 | } | ||
49 | |||
50 | private buildOriginallyPublishedAt (obj: any) { | ||
51 | let originallyPublishedAt: Date = null | ||
52 | |||
53 | const uploadDateMatcher = /^(\d{4})(\d{2})(\d{2})$/.exec(obj.upload_date) | ||
54 | if (uploadDateMatcher) { | ||
55 | originallyPublishedAt = new Date() | ||
56 | originallyPublishedAt.setHours(0, 0, 0, 0) | ||
57 | |||
58 | const year = parseInt(uploadDateMatcher[1], 10) | ||
59 | // Month starts from 0 | ||
60 | const month = parseInt(uploadDateMatcher[2], 10) - 1 | ||
61 | const day = parseInt(uploadDateMatcher[3], 10) | ||
62 | |||
63 | originallyPublishedAt.setFullYear(year, month, day) | ||
64 | } | ||
65 | |||
66 | return originallyPublishedAt | ||
67 | } | ||
68 | |||
69 | private buildVideoInfo (obj: any): YoutubeDLInfo { | ||
70 | return { | ||
71 | name: this.titleTruncation(obj.title), | ||
72 | description: this.descriptionTruncation(obj.description), | ||
73 | category: this.getCategory(obj.categories), | ||
74 | licence: this.getLicence(obj.license), | ||
75 | language: this.getLanguage(obj.language), | ||
76 | nsfw: this.isNSFW(obj), | ||
77 | tags: this.getTags(obj.tags), | ||
78 | thumbnailUrl: obj.thumbnail || undefined, | ||
79 | originallyPublishedAt: this.buildOriginallyPublishedAt(obj), | ||
80 | ext: obj.ext | ||
81 | } | ||
82 | } | ||
83 | |||
84 | private titleTruncation (title: string) { | ||
85 | return peertubeTruncate(title, { | ||
86 | length: CONSTRAINTS_FIELDS.VIDEOS.NAME.max, | ||
87 | separator: /,? +/, | ||
88 | omission: ' […]' | ||
89 | }) | ||
90 | } | ||
91 | |||
92 | private descriptionTruncation (description: string) { | ||
93 | if (!description || description.length < CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.min) return undefined | ||
94 | |||
95 | return peertubeTruncate(description, { | ||
96 | length: CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.max, | ||
97 | separator: /,? +/, | ||
98 | omission: ' […]' | ||
99 | }) | ||
100 | } | ||
101 | |||
102 | private isNSFW (info: any) { | ||
103 | return info?.age_limit >= 16 | ||
104 | } | ||
105 | |||
106 | private getTags (tags: string[]) { | ||
107 | if (Array.isArray(tags) === false) return [] | ||
108 | |||
109 | return tags | ||
110 | .filter(t => t.length < CONSTRAINTS_FIELDS.VIDEOS.TAG.max && t.length > CONSTRAINTS_FIELDS.VIDEOS.TAG.min) | ||
111 | .map(t => t.normalize()) | ||
112 | .slice(0, 5) | ||
113 | } | ||
114 | |||
115 | private getLicence (licence: string) { | ||
116 | if (!licence) return undefined | ||
117 | |||
118 | if (licence.includes('Creative Commons Attribution')) return 1 | ||
119 | |||
120 | for (const key of Object.keys(VIDEO_LICENCES)) { | ||
121 | const peertubeLicence = VIDEO_LICENCES[key] | ||
122 | if (peertubeLicence.toLowerCase() === licence.toLowerCase()) return parseInt(key, 10) | ||
123 | } | ||
124 | |||
125 | return undefined | ||
126 | } | ||
127 | |||
128 | private getCategory (categories: string[]) { | ||
129 | if (!categories) return undefined | ||
130 | |||
131 | const categoryString = categories[0] | ||
132 | if (!categoryString || typeof categoryString !== 'string') return undefined | ||
133 | |||
134 | if (categoryString === 'News & Politics') return 11 | ||
135 | |||
136 | for (const key of Object.keys(VIDEO_CATEGORIES)) { | ||
137 | const category = VIDEO_CATEGORIES[key] | ||
138 | if (categoryString.toLowerCase() === category.toLowerCase()) return parseInt(key, 10) | ||
139 | } | ||
140 | |||
141 | return undefined | ||
142 | } | ||
143 | |||
144 | private getLanguage (language: string) { | ||
145 | return VIDEO_LANGUAGES[language] ? language : undefined | ||
146 | } | ||
147 | } | ||
148 | |||
149 | // --------------------------------------------------------------------------- | ||
150 | |||
151 | export { | ||
152 | YoutubeDLInfo, | ||
153 | YoutubeDLInfoBuilder | ||
154 | } | ||
diff --git a/server/helpers/youtube-dl/youtube-dl-wrapper.ts b/server/helpers/youtube-dl/youtube-dl-wrapper.ts new file mode 100644 index 000000000..6960fbae4 --- /dev/null +++ b/server/helpers/youtube-dl/youtube-dl-wrapper.ts | |||
@@ -0,0 +1,135 @@ | |||
1 | import { move, pathExists, readdir, remove } from 'fs-extra' | ||
2 | import { dirname, join } from 'path' | ||
3 | import { CONFIG } from '@server/initializers/config' | ||
4 | import { isVideoFileExtnameValid } from '../custom-validators/videos' | ||
5 | import { logger, loggerTagsFactory } from '../logger' | ||
6 | import { generateVideoImportTmpPath } from '../utils' | ||
7 | import { YoutubeDLCLI } from './youtube-dl-cli' | ||
8 | import { YoutubeDLInfo, YoutubeDLInfoBuilder } from './youtube-dl-info-builder' | ||
9 | |||
10 | const lTags = loggerTagsFactory('youtube-dl') | ||
11 | |||
12 | export type YoutubeDLSubs = { | ||
13 | language: string | ||
14 | filename: string | ||
15 | path: string | ||
16 | }[] | ||
17 | |||
18 | const processOptions = { | ||
19 | maxBuffer: 1024 * 1024 * 10 // 10MB | ||
20 | } | ||
21 | |||
22 | class YoutubeDLWrapper { | ||
23 | |||
24 | constructor (private readonly url: string = '', private readonly enabledResolutions: number[] = []) { | ||
25 | |||
26 | } | ||
27 | |||
28 | async getInfoForDownload (youtubeDLArgs: string[] = []): Promise<YoutubeDLInfo> { | ||
29 | const youtubeDL = await YoutubeDLCLI.safeGet() | ||
30 | |||
31 | const info = await youtubeDL.getInfo({ | ||
32 | url: this.url, | ||
33 | format: YoutubeDLCLI.getYoutubeDLVideoFormat(this.enabledResolutions), | ||
34 | additionalYoutubeDLArgs: youtubeDLArgs, | ||
35 | processOptions | ||
36 | }) | ||
37 | |||
38 | if (info.is_live === true) throw new Error('Cannot download a live streaming.') | ||
39 | |||
40 | const infoBuilder = new YoutubeDLInfoBuilder(info) | ||
41 | |||
42 | return infoBuilder.getInfo() | ||
43 | } | ||
44 | |||
45 | async getSubtitles (): Promise<YoutubeDLSubs> { | ||
46 | const cwd = CONFIG.STORAGE.TMP_DIR | ||
47 | |||
48 | const youtubeDL = await YoutubeDLCLI.safeGet() | ||
49 | |||
50 | const files = await youtubeDL.getSubs({ url: this.url, format: 'vtt', processOptions: { cwd } }) | ||
51 | if (!files) return [] | ||
52 | |||
53 | logger.debug('Get subtitles from youtube dl.', { url: this.url, files, ...lTags() }) | ||
54 | |||
55 | const subtitles = files.reduce((acc, filename) => { | ||
56 | const matched = filename.match(/\.([a-z]{2})(-[a-z]+)?\.(vtt|ttml)/i) | ||
57 | if (!matched || !matched[1]) return acc | ||
58 | |||
59 | return [ | ||
60 | ...acc, | ||
61 | { | ||
62 | language: matched[1], | ||
63 | path: join(cwd, filename), | ||
64 | filename | ||
65 | } | ||
66 | ] | ||
67 | }, []) | ||
68 | |||
69 | return subtitles | ||
70 | } | ||
71 | |||
72 | async downloadVideo (fileExt: string, timeout: number): Promise<string> { | ||
73 | // Leave empty the extension, youtube-dl will add it | ||
74 | const pathWithoutExtension = generateVideoImportTmpPath(this.url, '') | ||
75 | |||
76 | let timer: NodeJS.Timeout | ||
77 | |||
78 | logger.info('Importing youtubeDL video %s to %s', this.url, pathWithoutExtension, lTags()) | ||
79 | |||
80 | const youtubeDL = await YoutubeDLCLI.safeGet() | ||
81 | |||
82 | const timeoutPromise = new Promise<string>((_, rej) => { | ||
83 | timer = setTimeout(() => rej(new Error('YoutubeDL download timeout.')), timeout) | ||
84 | }) | ||
85 | |||
86 | const downloadPromise = youtubeDL.download({ | ||
87 | url: this.url, | ||
88 | format: YoutubeDLCLI.getYoutubeDLVideoFormat(this.enabledResolutions), | ||
89 | output: pathWithoutExtension, | ||
90 | processOptions | ||
91 | }).then(() => clearTimeout(timer)) | ||
92 | .then(async () => { | ||
93 | // If youtube-dl did not guess an extension for our file, just use .mp4 as default | ||
94 | if (await pathExists(pathWithoutExtension)) { | ||
95 | await move(pathWithoutExtension, pathWithoutExtension + '.mp4') | ||
96 | } | ||
97 | |||
98 | return this.guessVideoPathWithExtension(pathWithoutExtension, fileExt) | ||
99 | }) | ||
100 | |||
101 | return Promise.race([ downloadPromise, timeoutPromise ]) | ||
102 | .catch(async err => { | ||
103 | const path = await this.guessVideoPathWithExtension(pathWithoutExtension, fileExt) | ||
104 | |||
105 | remove(path) | ||
106 | .catch(err => logger.error('Cannot remove file in youtubeDL timeout.', { err, ...lTags() })) | ||
107 | |||
108 | throw err | ||
109 | }) | ||
110 | } | ||
111 | |||
112 | private async guessVideoPathWithExtension (tmpPath: string, sourceExt: string) { | ||
113 | if (!isVideoFileExtnameValid(sourceExt)) { | ||
114 | throw new Error('Invalid video extension ' + sourceExt) | ||
115 | } | ||
116 | |||
117 | const extensions = [ sourceExt, '.mp4', '.mkv', '.webm' ] | ||
118 | |||
119 | for (const extension of extensions) { | ||
120 | const path = tmpPath + extension | ||
121 | |||
122 | if (await pathExists(path)) return path | ||
123 | } | ||
124 | |||
125 | const directoryContent = await readdir(dirname(tmpPath)) | ||
126 | |||
127 | throw new Error(`Cannot guess path of ${tmpPath}. Directory content: ${directoryContent.join(', ')}`) | ||
128 | } | ||
129 | } | ||
130 | |||
131 | // --------------------------------------------------------------------------- | ||
132 | |||
133 | export { | ||
134 | YoutubeDLWrapper | ||
135 | } | ||