diff options
author | Chocobozzz <me@florianbigard.com> | 2021-10-21 16:28:39 +0200 |
---|---|---|
committer | Chocobozzz <me@florianbigard.com> | 2021-10-22 10:25:24 +0200 |
commit | 62549e6c9818f422698f030e0b242609115493ed (patch) | |
tree | 12a969f694239fe5f926f779698df9523605ee80 /server/helpers/youtube-dl | |
parent | a71d4140a5b7831dbe2eb7a0dfaa6a755cb2e906 (diff) | |
download | PeerTube-62549e6c9818f422698f030e0b242609115493ed.tar.gz PeerTube-62549e6c9818f422698f030e0b242609115493ed.tar.zst PeerTube-62549e6c9818f422698f030e0b242609115493ed.zip |
Rewrite youtube-dl import
Use python3 binary
Allows to use a custom youtube-dl release URL
Allows to use yt-dlp (youtube-dl fork)
Remove proxy config from configuration to use HTTP_PROXY and HTTPS_PROXY
env variables
Diffstat (limited to 'server/helpers/youtube-dl')
-rw-r--r-- | server/helpers/youtube-dl/index.ts | 3 | ||||
-rw-r--r-- | server/helpers/youtube-dl/youtube-dl-cli.ts | 198 | ||||
-rw-r--r-- | server/helpers/youtube-dl/youtube-dl-info-builder.ts | 154 | ||||
-rw-r--r-- | server/helpers/youtube-dl/youtube-dl-wrapper.ts | 135 |
4 files changed, 490 insertions, 0 deletions
diff --git a/server/helpers/youtube-dl/index.ts b/server/helpers/youtube-dl/index.ts new file mode 100644 index 000000000..6afc77dcf --- /dev/null +++ b/server/helpers/youtube-dl/index.ts | |||
@@ -0,0 +1,3 @@ | |||
1 | export * from './youtube-dl-cli' | ||
2 | export * from './youtube-dl-info-builder' | ||
3 | export * from './youtube-dl-wrapper' | ||
diff --git a/server/helpers/youtube-dl/youtube-dl-cli.ts b/server/helpers/youtube-dl/youtube-dl-cli.ts new file mode 100644 index 000000000..440869205 --- /dev/null +++ b/server/helpers/youtube-dl/youtube-dl-cli.ts | |||
@@ -0,0 +1,198 @@ | |||
1 | import execa from 'execa' | ||
2 | import { pathExists, writeFile } from 'fs-extra' | ||
3 | import { join } from 'path' | ||
4 | import { CONFIG } from '@server/initializers/config' | ||
5 | import { VideoResolution } from '@shared/models' | ||
6 | import { logger, loggerTagsFactory } from '../logger' | ||
7 | import { getProxy, isProxyEnabled } from '../proxy' | ||
8 | import { isBinaryResponse, peertubeGot } from '../requests' | ||
9 | |||
10 | const lTags = loggerTagsFactory('youtube-dl') | ||
11 | |||
12 | const youtubeDLBinaryPath = join(CONFIG.STORAGE.BIN_DIR, CONFIG.IMPORT.VIDEOS.HTTP.YOUTUBE_DL_RELEASE.NAME) | ||
13 | |||
14 | export class YoutubeDLCLI { | ||
15 | |||
16 | static async safeGet () { | ||
17 | if (!await pathExists(youtubeDLBinaryPath)) { | ||
18 | await this.updateYoutubeDLBinary() | ||
19 | } | ||
20 | |||
21 | return new YoutubeDLCLI() | ||
22 | } | ||
23 | |||
24 | static async updateYoutubeDLBinary () { | ||
25 | const url = CONFIG.IMPORT.VIDEOS.HTTP.YOUTUBE_DL_RELEASE.URL | ||
26 | |||
27 | logger.info('Updating youtubeDL binary from %s.', url, lTags()) | ||
28 | |||
29 | const gotOptions = { context: { bodyKBLimit: 20_000 }, responseType: 'buffer' as 'buffer' } | ||
30 | |||
31 | try { | ||
32 | let gotResult = await peertubeGot(url, gotOptions) | ||
33 | |||
34 | if (!isBinaryResponse(gotResult)) { | ||
35 | const json = JSON.parse(gotResult.body.toString()) | ||
36 | const latest = json.filter(release => release.prerelease === false)[0] | ||
37 | if (!latest) throw new Error('Cannot find latest release') | ||
38 | |||
39 | const releaseName = CONFIG.IMPORT.VIDEOS.HTTP.YOUTUBE_DL_RELEASE.NAME | ||
40 | const releaseAsset = latest.assets.find(a => a.name === releaseName) | ||
41 | if (!releaseAsset) throw new Error(`Cannot find appropriate release with name ${releaseName} in release assets`) | ||
42 | |||
43 | gotResult = await peertubeGot(releaseAsset.browser_download_url, gotOptions) | ||
44 | } | ||
45 | |||
46 | if (!isBinaryResponse(gotResult)) { | ||
47 | throw new Error('Not a binary response') | ||
48 | } | ||
49 | |||
50 | await writeFile(youtubeDLBinaryPath, gotResult.body) | ||
51 | |||
52 | logger.info('youtube-dl updated %s.', youtubeDLBinaryPath, lTags()) | ||
53 | } catch (err) { | ||
54 | logger.error('Cannot update youtube-dl from %s.', url, { err, ...lTags() }) | ||
55 | } | ||
56 | } | ||
57 | |||
58 | static getYoutubeDLVideoFormat (enabledResolutions: VideoResolution[]) { | ||
59 | /** | ||
60 | * list of format selectors in order or preference | ||
61 | * see https://github.com/ytdl-org/youtube-dl#format-selection | ||
62 | * | ||
63 | * case #1 asks for a mp4 using h264 (avc1) and the exact resolution in the hope | ||
64 | * of being able to do a "quick-transcode" | ||
65 | * case #2 is the first fallback. No "quick-transcode" means we can get anything else (like vp9) | ||
66 | * case #3 is the resolution-degraded equivalent of #1, and already a pretty safe fallback | ||
67 | * | ||
68 | * in any case we avoid AV1, see https://github.com/Chocobozzz/PeerTube/issues/3499 | ||
69 | **/ | ||
70 | const resolution = enabledResolutions.length === 0 | ||
71 | ? VideoResolution.H_720P | ||
72 | : Math.max(...enabledResolutions) | ||
73 | |||
74 | return [ | ||
75 | `bestvideo[vcodec^=avc1][height=${resolution}]+bestaudio[ext=m4a]`, // case #1 | ||
76 | `bestvideo[vcodec!*=av01][vcodec!*=vp9.2][height=${resolution}]+bestaudio`, // case #2 | ||
77 | `bestvideo[vcodec^=avc1][height<=${resolution}]+bestaudio[ext=m4a]`, // case #3 | ||
78 | `bestvideo[vcodec!*=av01][vcodec!*=vp9.2]+bestaudio`, | ||
79 | 'best[vcodec!*=av01][vcodec!*=vp9.2]', // case fallback for known formats | ||
80 | 'best' // Ultimate fallback | ||
81 | ].join('/') | ||
82 | } | ||
83 | |||
84 | private constructor () { | ||
85 | |||
86 | } | ||
87 | |||
88 | download (options: { | ||
89 | url: string | ||
90 | format: string | ||
91 | output: string | ||
92 | processOptions: execa.NodeOptions | ||
93 | additionalYoutubeDLArgs?: string[] | ||
94 | }) { | ||
95 | return this.run({ | ||
96 | url: options.url, | ||
97 | processOptions: options.processOptions, | ||
98 | args: (options.additionalYoutubeDLArgs || []).concat([ '-f', options.format, '-o', options.output ]) | ||
99 | }) | ||
100 | } | ||
101 | |||
102 | async getInfo (options: { | ||
103 | url: string | ||
104 | format: string | ||
105 | processOptions: execa.NodeOptions | ||
106 | additionalYoutubeDLArgs?: string[] | ||
107 | }) { | ||
108 | const { url, format, additionalYoutubeDLArgs = [], processOptions } = options | ||
109 | |||
110 | const completeArgs = additionalYoutubeDLArgs.concat([ '--dump-json', '-f', format ]) | ||
111 | |||
112 | const data = await this.run({ url, args: completeArgs, processOptions }) | ||
113 | const info = data.map(this.parseInfo) | ||
114 | |||
115 | return info.length === 1 | ||
116 | ? info[0] | ||
117 | : info | ||
118 | } | ||
119 | |||
120 | async getSubs (options: { | ||
121 | url: string | ||
122 | format: 'vtt' | ||
123 | processOptions: execa.NodeOptions | ||
124 | }) { | ||
125 | const { url, format, processOptions } = options | ||
126 | |||
127 | const args = [ '--skip-download', '--all-subs', `--sub-format=${format}` ] | ||
128 | |||
129 | const data = await this.run({ url, args, processOptions }) | ||
130 | const files: string[] = [] | ||
131 | |||
132 | const skipString = '[info] Writing video subtitles to: ' | ||
133 | |||
134 | for (let i = 0, len = data.length; i < len; i++) { | ||
135 | const line = data[i] | ||
136 | |||
137 | if (line.indexOf(skipString) === 0) { | ||
138 | files.push(line.slice(skipString.length)) | ||
139 | } | ||
140 | } | ||
141 | |||
142 | return files | ||
143 | } | ||
144 | |||
145 | private async run (options: { | ||
146 | url: string | ||
147 | args: string[] | ||
148 | processOptions: execa.NodeOptions | ||
149 | }) { | ||
150 | const { url, args, processOptions } = options | ||
151 | |||
152 | let completeArgs = this.wrapWithProxyOptions(args) | ||
153 | completeArgs = this.wrapWithIPOptions(completeArgs) | ||
154 | completeArgs = this.wrapWithFFmpegOptions(completeArgs) | ||
155 | |||
156 | const output = await execa('python', [ youtubeDLBinaryPath, ...completeArgs, url ], processOptions) | ||
157 | |||
158 | logger.debug('Runned youtube-dl command.', { command: output.command, stdout: output.stdout, ...lTags() }) | ||
159 | |||
160 | return output.stdout | ||
161 | ? output.stdout.trim().split(/\r?\n/) | ||
162 | : undefined | ||
163 | } | ||
164 | |||
165 | private wrapWithProxyOptions (args: string[]) { | ||
166 | if (isProxyEnabled()) { | ||
167 | logger.debug('Using proxy %s for YoutubeDL', getProxy(), lTags()) | ||
168 | |||
169 | return [ '--proxy', getProxy() ].concat(args) | ||
170 | } | ||
171 | |||
172 | return args | ||
173 | } | ||
174 | |||
175 | private wrapWithIPOptions (args: string[]) { | ||
176 | if (CONFIG.IMPORT.VIDEOS.HTTP.FORCE_IPV4) { | ||
177 | logger.debug('Force ipv4 for YoutubeDL') | ||
178 | |||
179 | return [ '--force-ipv4' ].concat(args) | ||
180 | } | ||
181 | |||
182 | return args | ||
183 | } | ||
184 | |||
185 | private wrapWithFFmpegOptions (args: string[]) { | ||
186 | if (process.env.FFMPEG_PATH) { | ||
187 | logger.debug('Using ffmpeg location %s for YoutubeDL', process.env.FFMPEG_PATH, lTags()) | ||
188 | |||
189 | return [ '--ffmpeg-location', process.env.FFMPEG_PATH ].concat(args) | ||
190 | } | ||
191 | |||
192 | return args | ||
193 | } | ||
194 | |||
195 | private parseInfo (data: string) { | ||
196 | return JSON.parse(data) | ||
197 | } | ||
198 | } | ||
diff --git a/server/helpers/youtube-dl/youtube-dl-info-builder.ts b/server/helpers/youtube-dl/youtube-dl-info-builder.ts new file mode 100644 index 000000000..9746a7067 --- /dev/null +++ b/server/helpers/youtube-dl/youtube-dl-info-builder.ts | |||
@@ -0,0 +1,154 @@ | |||
1 | import { CONSTRAINTS_FIELDS, VIDEO_CATEGORIES, VIDEO_LANGUAGES, VIDEO_LICENCES } from '../../initializers/constants' | ||
2 | import { peertubeTruncate } from '../core-utils' | ||
3 | |||
4 | type YoutubeDLInfo = { | ||
5 | name?: string | ||
6 | description?: string | ||
7 | category?: number | ||
8 | language?: string | ||
9 | licence?: number | ||
10 | nsfw?: boolean | ||
11 | tags?: string[] | ||
12 | thumbnailUrl?: string | ||
13 | ext?: string | ||
14 | originallyPublishedAt?: Date | ||
15 | } | ||
16 | |||
17 | class YoutubeDLInfoBuilder { | ||
18 | private readonly info: any | ||
19 | |||
20 | constructor (info: any) { | ||
21 | this.info = { ...info } | ||
22 | } | ||
23 | |||
24 | getInfo () { | ||
25 | const obj = this.buildVideoInfo(this.normalizeObject(this.info)) | ||
26 | if (obj.name && obj.name.length < CONSTRAINTS_FIELDS.VIDEOS.NAME.min) obj.name += ' video' | ||
27 | |||
28 | return obj | ||
29 | } | ||
30 | |||
31 | private normalizeObject (obj: any) { | ||
32 | const newObj: any = {} | ||
33 | |||
34 | for (const key of Object.keys(obj)) { | ||
35 | // Deprecated key | ||
36 | if (key === 'resolution') continue | ||
37 | |||
38 | const value = obj[key] | ||
39 | |||
40 | if (typeof value === 'string') { | ||
41 | newObj[key] = value.normalize() | ||
42 | } else { | ||
43 | newObj[key] = value | ||
44 | } | ||
45 | } | ||
46 | |||
47 | return newObj | ||
48 | } | ||
49 | |||
50 | private buildOriginallyPublishedAt (obj: any) { | ||
51 | let originallyPublishedAt: Date = null | ||
52 | |||
53 | const uploadDateMatcher = /^(\d{4})(\d{2})(\d{2})$/.exec(obj.upload_date) | ||
54 | if (uploadDateMatcher) { | ||
55 | originallyPublishedAt = new Date() | ||
56 | originallyPublishedAt.setHours(0, 0, 0, 0) | ||
57 | |||
58 | const year = parseInt(uploadDateMatcher[1], 10) | ||
59 | // Month starts from 0 | ||
60 | const month = parseInt(uploadDateMatcher[2], 10) - 1 | ||
61 | const day = parseInt(uploadDateMatcher[3], 10) | ||
62 | |||
63 | originallyPublishedAt.setFullYear(year, month, day) | ||
64 | } | ||
65 | |||
66 | return originallyPublishedAt | ||
67 | } | ||
68 | |||
69 | private buildVideoInfo (obj: any): YoutubeDLInfo { | ||
70 | return { | ||
71 | name: this.titleTruncation(obj.title), | ||
72 | description: this.descriptionTruncation(obj.description), | ||
73 | category: this.getCategory(obj.categories), | ||
74 | licence: this.getLicence(obj.license), | ||
75 | language: this.getLanguage(obj.language), | ||
76 | nsfw: this.isNSFW(obj), | ||
77 | tags: this.getTags(obj.tags), | ||
78 | thumbnailUrl: obj.thumbnail || undefined, | ||
79 | originallyPublishedAt: this.buildOriginallyPublishedAt(obj), | ||
80 | ext: obj.ext | ||
81 | } | ||
82 | } | ||
83 | |||
84 | private titleTruncation (title: string) { | ||
85 | return peertubeTruncate(title, { | ||
86 | length: CONSTRAINTS_FIELDS.VIDEOS.NAME.max, | ||
87 | separator: /,? +/, | ||
88 | omission: ' […]' | ||
89 | }) | ||
90 | } | ||
91 | |||
92 | private descriptionTruncation (description: string) { | ||
93 | if (!description || description.length < CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.min) return undefined | ||
94 | |||
95 | return peertubeTruncate(description, { | ||
96 | length: CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.max, | ||
97 | separator: /,? +/, | ||
98 | omission: ' […]' | ||
99 | }) | ||
100 | } | ||
101 | |||
102 | private isNSFW (info: any) { | ||
103 | return info?.age_limit >= 16 | ||
104 | } | ||
105 | |||
106 | private getTags (tags: string[]) { | ||
107 | if (Array.isArray(tags) === false) return [] | ||
108 | |||
109 | return tags | ||
110 | .filter(t => t.length < CONSTRAINTS_FIELDS.VIDEOS.TAG.max && t.length > CONSTRAINTS_FIELDS.VIDEOS.TAG.min) | ||
111 | .map(t => t.normalize()) | ||
112 | .slice(0, 5) | ||
113 | } | ||
114 | |||
115 | private getLicence (licence: string) { | ||
116 | if (!licence) return undefined | ||
117 | |||
118 | if (licence.includes('Creative Commons Attribution')) return 1 | ||
119 | |||
120 | for (const key of Object.keys(VIDEO_LICENCES)) { | ||
121 | const peertubeLicence = VIDEO_LICENCES[key] | ||
122 | if (peertubeLicence.toLowerCase() === licence.toLowerCase()) return parseInt(key, 10) | ||
123 | } | ||
124 | |||
125 | return undefined | ||
126 | } | ||
127 | |||
128 | private getCategory (categories: string[]) { | ||
129 | if (!categories) return undefined | ||
130 | |||
131 | const categoryString = categories[0] | ||
132 | if (!categoryString || typeof categoryString !== 'string') return undefined | ||
133 | |||
134 | if (categoryString === 'News & Politics') return 11 | ||
135 | |||
136 | for (const key of Object.keys(VIDEO_CATEGORIES)) { | ||
137 | const category = VIDEO_CATEGORIES[key] | ||
138 | if (categoryString.toLowerCase() === category.toLowerCase()) return parseInt(key, 10) | ||
139 | } | ||
140 | |||
141 | return undefined | ||
142 | } | ||
143 | |||
144 | private getLanguage (language: string) { | ||
145 | return VIDEO_LANGUAGES[language] ? language : undefined | ||
146 | } | ||
147 | } | ||
148 | |||
149 | // --------------------------------------------------------------------------- | ||
150 | |||
151 | export { | ||
152 | YoutubeDLInfo, | ||
153 | YoutubeDLInfoBuilder | ||
154 | } | ||
diff --git a/server/helpers/youtube-dl/youtube-dl-wrapper.ts b/server/helpers/youtube-dl/youtube-dl-wrapper.ts new file mode 100644 index 000000000..6960fbae4 --- /dev/null +++ b/server/helpers/youtube-dl/youtube-dl-wrapper.ts | |||
@@ -0,0 +1,135 @@ | |||
1 | import { move, pathExists, readdir, remove } from 'fs-extra' | ||
2 | import { dirname, join } from 'path' | ||
3 | import { CONFIG } from '@server/initializers/config' | ||
4 | import { isVideoFileExtnameValid } from '../custom-validators/videos' | ||
5 | import { logger, loggerTagsFactory } from '../logger' | ||
6 | import { generateVideoImportTmpPath } from '../utils' | ||
7 | import { YoutubeDLCLI } from './youtube-dl-cli' | ||
8 | import { YoutubeDLInfo, YoutubeDLInfoBuilder } from './youtube-dl-info-builder' | ||
9 | |||
10 | const lTags = loggerTagsFactory('youtube-dl') | ||
11 | |||
12 | export type YoutubeDLSubs = { | ||
13 | language: string | ||
14 | filename: string | ||
15 | path: string | ||
16 | }[] | ||
17 | |||
18 | const processOptions = { | ||
19 | maxBuffer: 1024 * 1024 * 10 // 10MB | ||
20 | } | ||
21 | |||
22 | class YoutubeDLWrapper { | ||
23 | |||
24 | constructor (private readonly url: string = '', private readonly enabledResolutions: number[] = []) { | ||
25 | |||
26 | } | ||
27 | |||
28 | async getInfoForDownload (youtubeDLArgs: string[] = []): Promise<YoutubeDLInfo> { | ||
29 | const youtubeDL = await YoutubeDLCLI.safeGet() | ||
30 | |||
31 | const info = await youtubeDL.getInfo({ | ||
32 | url: this.url, | ||
33 | format: YoutubeDLCLI.getYoutubeDLVideoFormat(this.enabledResolutions), | ||
34 | additionalYoutubeDLArgs: youtubeDLArgs, | ||
35 | processOptions | ||
36 | }) | ||
37 | |||
38 | if (info.is_live === true) throw new Error('Cannot download a live streaming.') | ||
39 | |||
40 | const infoBuilder = new YoutubeDLInfoBuilder(info) | ||
41 | |||
42 | return infoBuilder.getInfo() | ||
43 | } | ||
44 | |||
45 | async getSubtitles (): Promise<YoutubeDLSubs> { | ||
46 | const cwd = CONFIG.STORAGE.TMP_DIR | ||
47 | |||
48 | const youtubeDL = await YoutubeDLCLI.safeGet() | ||
49 | |||
50 | const files = await youtubeDL.getSubs({ url: this.url, format: 'vtt', processOptions: { cwd } }) | ||
51 | if (!files) return [] | ||
52 | |||
53 | logger.debug('Get subtitles from youtube dl.', { url: this.url, files, ...lTags() }) | ||
54 | |||
55 | const subtitles = files.reduce((acc, filename) => { | ||
56 | const matched = filename.match(/\.([a-z]{2})(-[a-z]+)?\.(vtt|ttml)/i) | ||
57 | if (!matched || !matched[1]) return acc | ||
58 | |||
59 | return [ | ||
60 | ...acc, | ||
61 | { | ||
62 | language: matched[1], | ||
63 | path: join(cwd, filename), | ||
64 | filename | ||
65 | } | ||
66 | ] | ||
67 | }, []) | ||
68 | |||
69 | return subtitles | ||
70 | } | ||
71 | |||
72 | async downloadVideo (fileExt: string, timeout: number): Promise<string> { | ||
73 | // Leave empty the extension, youtube-dl will add it | ||
74 | const pathWithoutExtension = generateVideoImportTmpPath(this.url, '') | ||
75 | |||
76 | let timer: NodeJS.Timeout | ||
77 | |||
78 | logger.info('Importing youtubeDL video %s to %s', this.url, pathWithoutExtension, lTags()) | ||
79 | |||
80 | const youtubeDL = await YoutubeDLCLI.safeGet() | ||
81 | |||
82 | const timeoutPromise = new Promise<string>((_, rej) => { | ||
83 | timer = setTimeout(() => rej(new Error('YoutubeDL download timeout.')), timeout) | ||
84 | }) | ||
85 | |||
86 | const downloadPromise = youtubeDL.download({ | ||
87 | url: this.url, | ||
88 | format: YoutubeDLCLI.getYoutubeDLVideoFormat(this.enabledResolutions), | ||
89 | output: pathWithoutExtension, | ||
90 | processOptions | ||
91 | }).then(() => clearTimeout(timer)) | ||
92 | .then(async () => { | ||
93 | // If youtube-dl did not guess an extension for our file, just use .mp4 as default | ||
94 | if (await pathExists(pathWithoutExtension)) { | ||
95 | await move(pathWithoutExtension, pathWithoutExtension + '.mp4') | ||
96 | } | ||
97 | |||
98 | return this.guessVideoPathWithExtension(pathWithoutExtension, fileExt) | ||
99 | }) | ||
100 | |||
101 | return Promise.race([ downloadPromise, timeoutPromise ]) | ||
102 | .catch(async err => { | ||
103 | const path = await this.guessVideoPathWithExtension(pathWithoutExtension, fileExt) | ||
104 | |||
105 | remove(path) | ||
106 | .catch(err => logger.error('Cannot remove file in youtubeDL timeout.', { err, ...lTags() })) | ||
107 | |||
108 | throw err | ||
109 | }) | ||
110 | } | ||
111 | |||
112 | private async guessVideoPathWithExtension (tmpPath: string, sourceExt: string) { | ||
113 | if (!isVideoFileExtnameValid(sourceExt)) { | ||
114 | throw new Error('Invalid video extension ' + sourceExt) | ||
115 | } | ||
116 | |||
117 | const extensions = [ sourceExt, '.mp4', '.mkv', '.webm' ] | ||
118 | |||
119 | for (const extension of extensions) { | ||
120 | const path = tmpPath + extension | ||
121 | |||
122 | if (await pathExists(path)) return path | ||
123 | } | ||
124 | |||
125 | const directoryContent = await readdir(dirname(tmpPath)) | ||
126 | |||
127 | throw new Error(`Cannot guess path of ${tmpPath}. Directory content: ${directoryContent.join(', ')}`) | ||
128 | } | ||
129 | } | ||
130 | |||
131 | // --------------------------------------------------------------------------- | ||
132 | |||
133 | export { | ||
134 | YoutubeDLWrapper | ||
135 | } | ||