aboutsummaryrefslogtreecommitdiffhomepage
path: root/server/helpers/youtube-dl
diff options
context:
space:
mode:
authorChocobozzz <me@florianbigard.com>2021-10-21 16:28:39 +0200
committerChocobozzz <me@florianbigard.com>2021-10-22 10:25:24 +0200
commit62549e6c9818f422698f030e0b242609115493ed (patch)
tree12a969f694239fe5f926f779698df9523605ee80 /server/helpers/youtube-dl
parenta71d4140a5b7831dbe2eb7a0dfaa6a755cb2e906 (diff)
downloadPeerTube-62549e6c9818f422698f030e0b242609115493ed.tar.gz
PeerTube-62549e6c9818f422698f030e0b242609115493ed.tar.zst
PeerTube-62549e6c9818f422698f030e0b242609115493ed.zip
Rewrite youtube-dl import
Use python3 binary Allows to use a custom youtube-dl release URL Allows to use yt-dlp (youtube-dl fork) Remove proxy config from configuration to use HTTP_PROXY and HTTPS_PROXY env variables
Diffstat (limited to 'server/helpers/youtube-dl')
-rw-r--r--server/helpers/youtube-dl/index.ts3
-rw-r--r--server/helpers/youtube-dl/youtube-dl-cli.ts198
-rw-r--r--server/helpers/youtube-dl/youtube-dl-info-builder.ts154
-rw-r--r--server/helpers/youtube-dl/youtube-dl-wrapper.ts135
4 files changed, 490 insertions, 0 deletions
diff --git a/server/helpers/youtube-dl/index.ts b/server/helpers/youtube-dl/index.ts
new file mode 100644
index 000000000..6afc77dcf
--- /dev/null
+++ b/server/helpers/youtube-dl/index.ts
@@ -0,0 +1,3 @@
1export * from './youtube-dl-cli'
2export * from './youtube-dl-info-builder'
3export * from './youtube-dl-wrapper'
diff --git a/server/helpers/youtube-dl/youtube-dl-cli.ts b/server/helpers/youtube-dl/youtube-dl-cli.ts
new file mode 100644
index 000000000..440869205
--- /dev/null
+++ b/server/helpers/youtube-dl/youtube-dl-cli.ts
@@ -0,0 +1,198 @@
1import execa from 'execa'
2import { pathExists, writeFile } from 'fs-extra'
3import { join } from 'path'
4import { CONFIG } from '@server/initializers/config'
5import { VideoResolution } from '@shared/models'
6import { logger, loggerTagsFactory } from '../logger'
7import { getProxy, isProxyEnabled } from '../proxy'
8import { isBinaryResponse, peertubeGot } from '../requests'
9
10const lTags = loggerTagsFactory('youtube-dl')
11
12const youtubeDLBinaryPath = join(CONFIG.STORAGE.BIN_DIR, CONFIG.IMPORT.VIDEOS.HTTP.YOUTUBE_DL_RELEASE.NAME)
13
14export class YoutubeDLCLI {
15
16 static async safeGet () {
17 if (!await pathExists(youtubeDLBinaryPath)) {
18 await this.updateYoutubeDLBinary()
19 }
20
21 return new YoutubeDLCLI()
22 }
23
24 static async updateYoutubeDLBinary () {
25 const url = CONFIG.IMPORT.VIDEOS.HTTP.YOUTUBE_DL_RELEASE.URL
26
27 logger.info('Updating youtubeDL binary from %s.', url, lTags())
28
29 const gotOptions = { context: { bodyKBLimit: 20_000 }, responseType: 'buffer' as 'buffer' }
30
31 try {
32 let gotResult = await peertubeGot(url, gotOptions)
33
34 if (!isBinaryResponse(gotResult)) {
35 const json = JSON.parse(gotResult.body.toString())
36 const latest = json.filter(release => release.prerelease === false)[0]
37 if (!latest) throw new Error('Cannot find latest release')
38
39 const releaseName = CONFIG.IMPORT.VIDEOS.HTTP.YOUTUBE_DL_RELEASE.NAME
40 const releaseAsset = latest.assets.find(a => a.name === releaseName)
41 if (!releaseAsset) throw new Error(`Cannot find appropriate release with name ${releaseName} in release assets`)
42
43 gotResult = await peertubeGot(releaseAsset.browser_download_url, gotOptions)
44 }
45
46 if (!isBinaryResponse(gotResult)) {
47 throw new Error('Not a binary response')
48 }
49
50 await writeFile(youtubeDLBinaryPath, gotResult.body)
51
52 logger.info('youtube-dl updated %s.', youtubeDLBinaryPath, lTags())
53 } catch (err) {
54 logger.error('Cannot update youtube-dl from %s.', url, { err, ...lTags() })
55 }
56 }
57
58 static getYoutubeDLVideoFormat (enabledResolutions: VideoResolution[]) {
59 /**
60 * list of format selectors in order or preference
61 * see https://github.com/ytdl-org/youtube-dl#format-selection
62 *
63 * case #1 asks for a mp4 using h264 (avc1) and the exact resolution in the hope
64 * of being able to do a "quick-transcode"
65 * case #2 is the first fallback. No "quick-transcode" means we can get anything else (like vp9)
66 * case #3 is the resolution-degraded equivalent of #1, and already a pretty safe fallback
67 *
68 * in any case we avoid AV1, see https://github.com/Chocobozzz/PeerTube/issues/3499
69 **/
70 const resolution = enabledResolutions.length === 0
71 ? VideoResolution.H_720P
72 : Math.max(...enabledResolutions)
73
74 return [
75 `bestvideo[vcodec^=avc1][height=${resolution}]+bestaudio[ext=m4a]`, // case #1
76 `bestvideo[vcodec!*=av01][vcodec!*=vp9.2][height=${resolution}]+bestaudio`, // case #2
77 `bestvideo[vcodec^=avc1][height<=${resolution}]+bestaudio[ext=m4a]`, // case #3
78 `bestvideo[vcodec!*=av01][vcodec!*=vp9.2]+bestaudio`,
79 'best[vcodec!*=av01][vcodec!*=vp9.2]', // case fallback for known formats
80 'best' // Ultimate fallback
81 ].join('/')
82 }
83
84 private constructor () {
85
86 }
87
88 download (options: {
89 url: string
90 format: string
91 output: string
92 processOptions: execa.NodeOptions
93 additionalYoutubeDLArgs?: string[]
94 }) {
95 return this.run({
96 url: options.url,
97 processOptions: options.processOptions,
98 args: (options.additionalYoutubeDLArgs || []).concat([ '-f', options.format, '-o', options.output ])
99 })
100 }
101
102 async getInfo (options: {
103 url: string
104 format: string
105 processOptions: execa.NodeOptions
106 additionalYoutubeDLArgs?: string[]
107 }) {
108 const { url, format, additionalYoutubeDLArgs = [], processOptions } = options
109
110 const completeArgs = additionalYoutubeDLArgs.concat([ '--dump-json', '-f', format ])
111
112 const data = await this.run({ url, args: completeArgs, processOptions })
113 const info = data.map(this.parseInfo)
114
115 return info.length === 1
116 ? info[0]
117 : info
118 }
119
120 async getSubs (options: {
121 url: string
122 format: 'vtt'
123 processOptions: execa.NodeOptions
124 }) {
125 const { url, format, processOptions } = options
126
127 const args = [ '--skip-download', '--all-subs', `--sub-format=${format}` ]
128
129 const data = await this.run({ url, args, processOptions })
130 const files: string[] = []
131
132 const skipString = '[info] Writing video subtitles to: '
133
134 for (let i = 0, len = data.length; i < len; i++) {
135 const line = data[i]
136
137 if (line.indexOf(skipString) === 0) {
138 files.push(line.slice(skipString.length))
139 }
140 }
141
142 return files
143 }
144
145 private async run (options: {
146 url: string
147 args: string[]
148 processOptions: execa.NodeOptions
149 }) {
150 const { url, args, processOptions } = options
151
152 let completeArgs = this.wrapWithProxyOptions(args)
153 completeArgs = this.wrapWithIPOptions(completeArgs)
154 completeArgs = this.wrapWithFFmpegOptions(completeArgs)
155
156 const output = await execa('python', [ youtubeDLBinaryPath, ...completeArgs, url ], processOptions)
157
158 logger.debug('Runned youtube-dl command.', { command: output.command, stdout: output.stdout, ...lTags() })
159
160 return output.stdout
161 ? output.stdout.trim().split(/\r?\n/)
162 : undefined
163 }
164
165 private wrapWithProxyOptions (args: string[]) {
166 if (isProxyEnabled()) {
167 logger.debug('Using proxy %s for YoutubeDL', getProxy(), lTags())
168
169 return [ '--proxy', getProxy() ].concat(args)
170 }
171
172 return args
173 }
174
175 private wrapWithIPOptions (args: string[]) {
176 if (CONFIG.IMPORT.VIDEOS.HTTP.FORCE_IPV4) {
177 logger.debug('Force ipv4 for YoutubeDL')
178
179 return [ '--force-ipv4' ].concat(args)
180 }
181
182 return args
183 }
184
185 private wrapWithFFmpegOptions (args: string[]) {
186 if (process.env.FFMPEG_PATH) {
187 logger.debug('Using ffmpeg location %s for YoutubeDL', process.env.FFMPEG_PATH, lTags())
188
189 return [ '--ffmpeg-location', process.env.FFMPEG_PATH ].concat(args)
190 }
191
192 return args
193 }
194
195 private parseInfo (data: string) {
196 return JSON.parse(data)
197 }
198}
diff --git a/server/helpers/youtube-dl/youtube-dl-info-builder.ts b/server/helpers/youtube-dl/youtube-dl-info-builder.ts
new file mode 100644
index 000000000..9746a7067
--- /dev/null
+++ b/server/helpers/youtube-dl/youtube-dl-info-builder.ts
@@ -0,0 +1,154 @@
1import { CONSTRAINTS_FIELDS, VIDEO_CATEGORIES, VIDEO_LANGUAGES, VIDEO_LICENCES } from '../../initializers/constants'
2import { peertubeTruncate } from '../core-utils'
3
4type YoutubeDLInfo = {
5 name?: string
6 description?: string
7 category?: number
8 language?: string
9 licence?: number
10 nsfw?: boolean
11 tags?: string[]
12 thumbnailUrl?: string
13 ext?: string
14 originallyPublishedAt?: Date
15}
16
17class YoutubeDLInfoBuilder {
18 private readonly info: any
19
20 constructor (info: any) {
21 this.info = { ...info }
22 }
23
24 getInfo () {
25 const obj = this.buildVideoInfo(this.normalizeObject(this.info))
26 if (obj.name && obj.name.length < CONSTRAINTS_FIELDS.VIDEOS.NAME.min) obj.name += ' video'
27
28 return obj
29 }
30
31 private normalizeObject (obj: any) {
32 const newObj: any = {}
33
34 for (const key of Object.keys(obj)) {
35 // Deprecated key
36 if (key === 'resolution') continue
37
38 const value = obj[key]
39
40 if (typeof value === 'string') {
41 newObj[key] = value.normalize()
42 } else {
43 newObj[key] = value
44 }
45 }
46
47 return newObj
48 }
49
50 private buildOriginallyPublishedAt (obj: any) {
51 let originallyPublishedAt: Date = null
52
53 const uploadDateMatcher = /^(\d{4})(\d{2})(\d{2})$/.exec(obj.upload_date)
54 if (uploadDateMatcher) {
55 originallyPublishedAt = new Date()
56 originallyPublishedAt.setHours(0, 0, 0, 0)
57
58 const year = parseInt(uploadDateMatcher[1], 10)
59 // Month starts from 0
60 const month = parseInt(uploadDateMatcher[2], 10) - 1
61 const day = parseInt(uploadDateMatcher[3], 10)
62
63 originallyPublishedAt.setFullYear(year, month, day)
64 }
65
66 return originallyPublishedAt
67 }
68
69 private buildVideoInfo (obj: any): YoutubeDLInfo {
70 return {
71 name: this.titleTruncation(obj.title),
72 description: this.descriptionTruncation(obj.description),
73 category: this.getCategory(obj.categories),
74 licence: this.getLicence(obj.license),
75 language: this.getLanguage(obj.language),
76 nsfw: this.isNSFW(obj),
77 tags: this.getTags(obj.tags),
78 thumbnailUrl: obj.thumbnail || undefined,
79 originallyPublishedAt: this.buildOriginallyPublishedAt(obj),
80 ext: obj.ext
81 }
82 }
83
84 private titleTruncation (title: string) {
85 return peertubeTruncate(title, {
86 length: CONSTRAINTS_FIELDS.VIDEOS.NAME.max,
87 separator: /,? +/,
88 omission: ' […]'
89 })
90 }
91
92 private descriptionTruncation (description: string) {
93 if (!description || description.length < CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.min) return undefined
94
95 return peertubeTruncate(description, {
96 length: CONSTRAINTS_FIELDS.VIDEOS.DESCRIPTION.max,
97 separator: /,? +/,
98 omission: ' […]'
99 })
100 }
101
102 private isNSFW (info: any) {
103 return info?.age_limit >= 16
104 }
105
106 private getTags (tags: string[]) {
107 if (Array.isArray(tags) === false) return []
108
109 return tags
110 .filter(t => t.length < CONSTRAINTS_FIELDS.VIDEOS.TAG.max && t.length > CONSTRAINTS_FIELDS.VIDEOS.TAG.min)
111 .map(t => t.normalize())
112 .slice(0, 5)
113 }
114
115 private getLicence (licence: string) {
116 if (!licence) return undefined
117
118 if (licence.includes('Creative Commons Attribution')) return 1
119
120 for (const key of Object.keys(VIDEO_LICENCES)) {
121 const peertubeLicence = VIDEO_LICENCES[key]
122 if (peertubeLicence.toLowerCase() === licence.toLowerCase()) return parseInt(key, 10)
123 }
124
125 return undefined
126 }
127
128 private getCategory (categories: string[]) {
129 if (!categories) return undefined
130
131 const categoryString = categories[0]
132 if (!categoryString || typeof categoryString !== 'string') return undefined
133
134 if (categoryString === 'News & Politics') return 11
135
136 for (const key of Object.keys(VIDEO_CATEGORIES)) {
137 const category = VIDEO_CATEGORIES[key]
138 if (categoryString.toLowerCase() === category.toLowerCase()) return parseInt(key, 10)
139 }
140
141 return undefined
142 }
143
144 private getLanguage (language: string) {
145 return VIDEO_LANGUAGES[language] ? language : undefined
146 }
147}
148
149// ---------------------------------------------------------------------------
150
151export {
152 YoutubeDLInfo,
153 YoutubeDLInfoBuilder
154}
diff --git a/server/helpers/youtube-dl/youtube-dl-wrapper.ts b/server/helpers/youtube-dl/youtube-dl-wrapper.ts
new file mode 100644
index 000000000..6960fbae4
--- /dev/null
+++ b/server/helpers/youtube-dl/youtube-dl-wrapper.ts
@@ -0,0 +1,135 @@
1import { move, pathExists, readdir, remove } from 'fs-extra'
2import { dirname, join } from 'path'
3import { CONFIG } from '@server/initializers/config'
4import { isVideoFileExtnameValid } from '../custom-validators/videos'
5import { logger, loggerTagsFactory } from '../logger'
6import { generateVideoImportTmpPath } from '../utils'
7import { YoutubeDLCLI } from './youtube-dl-cli'
8import { YoutubeDLInfo, YoutubeDLInfoBuilder } from './youtube-dl-info-builder'
9
10const lTags = loggerTagsFactory('youtube-dl')
11
12export type YoutubeDLSubs = {
13 language: string
14 filename: string
15 path: string
16}[]
17
18const processOptions = {
19 maxBuffer: 1024 * 1024 * 10 // 10MB
20}
21
22class YoutubeDLWrapper {
23
24 constructor (private readonly url: string = '', private readonly enabledResolutions: number[] = []) {
25
26 }
27
28 async getInfoForDownload (youtubeDLArgs: string[] = []): Promise<YoutubeDLInfo> {
29 const youtubeDL = await YoutubeDLCLI.safeGet()
30
31 const info = await youtubeDL.getInfo({
32 url: this.url,
33 format: YoutubeDLCLI.getYoutubeDLVideoFormat(this.enabledResolutions),
34 additionalYoutubeDLArgs: youtubeDLArgs,
35 processOptions
36 })
37
38 if (info.is_live === true) throw new Error('Cannot download a live streaming.')
39
40 const infoBuilder = new YoutubeDLInfoBuilder(info)
41
42 return infoBuilder.getInfo()
43 }
44
45 async getSubtitles (): Promise<YoutubeDLSubs> {
46 const cwd = CONFIG.STORAGE.TMP_DIR
47
48 const youtubeDL = await YoutubeDLCLI.safeGet()
49
50 const files = await youtubeDL.getSubs({ url: this.url, format: 'vtt', processOptions: { cwd } })
51 if (!files) return []
52
53 logger.debug('Get subtitles from youtube dl.', { url: this.url, files, ...lTags() })
54
55 const subtitles = files.reduce((acc, filename) => {
56 const matched = filename.match(/\.([a-z]{2})(-[a-z]+)?\.(vtt|ttml)/i)
57 if (!matched || !matched[1]) return acc
58
59 return [
60 ...acc,
61 {
62 language: matched[1],
63 path: join(cwd, filename),
64 filename
65 }
66 ]
67 }, [])
68
69 return subtitles
70 }
71
72 async downloadVideo (fileExt: string, timeout: number): Promise<string> {
73 // Leave empty the extension, youtube-dl will add it
74 const pathWithoutExtension = generateVideoImportTmpPath(this.url, '')
75
76 let timer: NodeJS.Timeout
77
78 logger.info('Importing youtubeDL video %s to %s', this.url, pathWithoutExtension, lTags())
79
80 const youtubeDL = await YoutubeDLCLI.safeGet()
81
82 const timeoutPromise = new Promise<string>((_, rej) => {
83 timer = setTimeout(() => rej(new Error('YoutubeDL download timeout.')), timeout)
84 })
85
86 const downloadPromise = youtubeDL.download({
87 url: this.url,
88 format: YoutubeDLCLI.getYoutubeDLVideoFormat(this.enabledResolutions),
89 output: pathWithoutExtension,
90 processOptions
91 }).then(() => clearTimeout(timer))
92 .then(async () => {
93 // If youtube-dl did not guess an extension for our file, just use .mp4 as default
94 if (await pathExists(pathWithoutExtension)) {
95 await move(pathWithoutExtension, pathWithoutExtension + '.mp4')
96 }
97
98 return this.guessVideoPathWithExtension(pathWithoutExtension, fileExt)
99 })
100
101 return Promise.race([ downloadPromise, timeoutPromise ])
102 .catch(async err => {
103 const path = await this.guessVideoPathWithExtension(pathWithoutExtension, fileExt)
104
105 remove(path)
106 .catch(err => logger.error('Cannot remove file in youtubeDL timeout.', { err, ...lTags() }))
107
108 throw err
109 })
110 }
111
112 private async guessVideoPathWithExtension (tmpPath: string, sourceExt: string) {
113 if (!isVideoFileExtnameValid(sourceExt)) {
114 throw new Error('Invalid video extension ' + sourceExt)
115 }
116
117 const extensions = [ sourceExt, '.mp4', '.mkv', '.webm' ]
118
119 for (const extension of extensions) {
120 const path = tmpPath + extension
121
122 if (await pathExists(path)) return path
123 }
124
125 const directoryContent = await readdir(dirname(tmpPath))
126
127 throw new Error(`Cannot guess path of ${tmpPath}. Directory content: ${directoryContent.join(', ')}`)
128 }
129}
130
131// ---------------------------------------------------------------------------
132
133export {
134 YoutubeDLWrapper
135}