From 0305db28c98fd6cf43a3c50ba92c76215e99d512 Mon Sep 17 00:00:00 2001 From: Jelle Besseling Date: Tue, 17 Aug 2021 08:26:20 +0200 Subject: Add support for saving video files to object storage (#4290) * Add support for saving video files to object storage * Add support for custom url generation on s3 stored files Uses two config keys to support url generation that doesn't directly go to (compatible s3). Can be used to generate urls to any cache server or CDN. * Upload files to s3 concurrently and delete originals afterwards * Only publish after move to object storage is complete * Use base url instead of url template * Fix mistyped config field * Add rudenmentary way to download before transcode * Implement Chocobozzz suggestions https://github.com/Chocobozzz/PeerTube/pull/4290#issuecomment-891670478 The remarks in question: Try to use objectStorage prefix instead of s3 prefix for your function/variables/config names Prefer to use a tree for the config: s3.streaming_playlists_bucket -> object_storage.streaming_playlists.bucket Use uppercase for config: S3.STREAMING_PLAYLISTS_BUCKETINFO.bucket -> OBJECT_STORAGE.STREAMING_PLAYLISTS.BUCKET (maybe BUCKET_NAME instead of BUCKET) I suggest to rename moveJobsRunning to pendingMovingJobs (or better, create a dedicated videoJobInfo table with a pendingMove & videoId columns so we could also use this table to track pending transcoding jobs) https://github.com/Chocobozzz/PeerTube/pull/4290/files#diff-3e26d41ca4bda1de8e1747af70ca2af642abcc1e9e0bfb94239ff2165acfbde5R19 uses a string instead of an integer I think we should store the origin object storage URL in fileUrl, without base_url injection. Instead, inject the base_url at "runtime" so admins can easily change this configuration without running a script to update DB URLs * Import correct function * Support multipart upload * Remove import of node 15.0 module stream/promises * Extend maximum upload job length Using the same value as for redundancy downloading seems logical * Use dynamic part size for really large uploads Also adds very small part size for local testing * Fix decreasePendingMove query * Resolve various PR comments * Move to object storage after optimize * Make upload size configurable and increase default * Prune webtorrent files that are stored in object storage * Move files after transcoding jobs * Fix federation * Add video path manager * Support move to external storage job in client * Fix live object storage tests Co-authored-by: Chocobozzz --- server/lib/object-storage/index.ts | 3 + server/lib/object-storage/keys.ts | 20 ++ server/lib/object-storage/shared/client.ts | 56 +++++ server/lib/object-storage/shared/index.ts | 3 + server/lib/object-storage/shared/logger.ts | 7 + .../shared/object-storage-helpers.ts | 229 +++++++++++++++++++++ server/lib/object-storage/urls.ts | 40 ++++ server/lib/object-storage/videos.ts | 72 +++++++ 8 files changed, 430 insertions(+) create mode 100644 server/lib/object-storage/index.ts create mode 100644 server/lib/object-storage/keys.ts create mode 100644 server/lib/object-storage/shared/client.ts create mode 100644 server/lib/object-storage/shared/index.ts create mode 100644 server/lib/object-storage/shared/logger.ts create mode 100644 server/lib/object-storage/shared/object-storage-helpers.ts create mode 100644 server/lib/object-storage/urls.ts create mode 100644 server/lib/object-storage/videos.ts (limited to 'server/lib/object-storage') diff --git a/server/lib/object-storage/index.ts b/server/lib/object-storage/index.ts new file mode 100644 index 000000000..8b413a40e --- /dev/null +++ b/server/lib/object-storage/index.ts @@ -0,0 +1,3 @@ +export * from './keys' +export * from './urls' +export * from './videos' diff --git a/server/lib/object-storage/keys.ts b/server/lib/object-storage/keys.ts new file mode 100644 index 000000000..519474775 --- /dev/null +++ b/server/lib/object-storage/keys.ts @@ -0,0 +1,20 @@ +import { join } from 'path' +import { MStreamingPlaylist, MVideoUUID } from '@server/types/models' + +function generateHLSObjectStorageKey (playlist: MStreamingPlaylist, video: MVideoUUID, filename: string) { + return join(generateHLSObjectBaseStorageKey(playlist, video), filename) +} + +function generateHLSObjectBaseStorageKey (playlist: MStreamingPlaylist, video: MVideoUUID) { + return playlist.getStringType() + '_' + video.uuid +} + +function generateWebTorrentObjectStorageKey (filename: string) { + return filename +} + +export { + generateHLSObjectStorageKey, + generateHLSObjectBaseStorageKey, + generateWebTorrentObjectStorageKey +} diff --git a/server/lib/object-storage/shared/client.ts b/server/lib/object-storage/shared/client.ts new file mode 100644 index 000000000..c9a614593 --- /dev/null +++ b/server/lib/object-storage/shared/client.ts @@ -0,0 +1,56 @@ +import { S3Client } from '@aws-sdk/client-s3' +import { logger } from '@server/helpers/logger' +import { CONFIG } from '@server/initializers/config' +import { lTags } from './logger' + +let endpointParsed: URL +function getEndpointParsed () { + if (endpointParsed) return endpointParsed + + endpointParsed = new URL(getEndpoint()) + + return endpointParsed +} + +let s3Client: S3Client +function getClient () { + if (s3Client) return s3Client + + const OBJECT_STORAGE = CONFIG.OBJECT_STORAGE + + s3Client = new S3Client({ + endpoint: getEndpoint(), + region: OBJECT_STORAGE.REGION, + credentials: OBJECT_STORAGE.CREDENTIALS.ACCESS_KEY_ID + ? { + accessKeyId: OBJECT_STORAGE.CREDENTIALS.ACCESS_KEY_ID, + secretAccessKey: OBJECT_STORAGE.CREDENTIALS.SECRET_ACCESS_KEY + } + : undefined + }) + + logger.info('Initialized S3 client %s with region %s.', getEndpoint(), OBJECT_STORAGE.REGION, lTags()) + + return s3Client +} + +// --------------------------------------------------------------------------- + +export { + getEndpointParsed, + getClient +} + +// --------------------------------------------------------------------------- + +let endpoint: string +function getEndpoint () { + if (endpoint) return endpoint + + const endpointConfig = CONFIG.OBJECT_STORAGE.ENDPOINT + endpoint = endpointConfig.startsWith('http://') || endpointConfig.startsWith('https://') + ? CONFIG.OBJECT_STORAGE.ENDPOINT + : 'https://' + CONFIG.OBJECT_STORAGE.ENDPOINT + + return endpoint +} diff --git a/server/lib/object-storage/shared/index.ts b/server/lib/object-storage/shared/index.ts new file mode 100644 index 000000000..11e10aa9f --- /dev/null +++ b/server/lib/object-storage/shared/index.ts @@ -0,0 +1,3 @@ +export * from './client' +export * from './logger' +export * from './object-storage-helpers' diff --git a/server/lib/object-storage/shared/logger.ts b/server/lib/object-storage/shared/logger.ts new file mode 100644 index 000000000..8ab7cbd71 --- /dev/null +++ b/server/lib/object-storage/shared/logger.ts @@ -0,0 +1,7 @@ +import { loggerTagsFactory } from '@server/helpers/logger' + +const lTags = loggerTagsFactory('object-storage') + +export { + lTags +} diff --git a/server/lib/object-storage/shared/object-storage-helpers.ts b/server/lib/object-storage/shared/object-storage-helpers.ts new file mode 100644 index 000000000..e23216907 --- /dev/null +++ b/server/lib/object-storage/shared/object-storage-helpers.ts @@ -0,0 +1,229 @@ +import { close, createReadStream, createWriteStream, ensureDir, open, ReadStream, stat } from 'fs-extra' +import { min } from 'lodash' +import { dirname } from 'path' +import { Readable } from 'stream' +import { + CompletedPart, + CompleteMultipartUploadCommand, + CreateMultipartUploadCommand, + DeleteObjectCommand, + GetObjectCommand, + ListObjectsV2Command, + PutObjectCommand, + UploadPartCommand +} from '@aws-sdk/client-s3' +import { pipelinePromise } from '@server/helpers/core-utils' +import { isArray } from '@server/helpers/custom-validators/misc' +import { logger } from '@server/helpers/logger' +import { CONFIG } from '@server/initializers/config' +import { getPrivateUrl } from '../urls' +import { getClient } from './client' +import { lTags } from './logger' + +type BucketInfo = { + BUCKET_NAME: string + PREFIX?: string +} + +async function storeObject (options: { + inputPath: string + objectStorageKey: string + bucketInfo: BucketInfo +}): Promise { + const { inputPath, objectStorageKey, bucketInfo } = options + + logger.debug('Uploading file %s to %s%s in bucket %s', inputPath, bucketInfo.PREFIX, objectStorageKey, bucketInfo.BUCKET_NAME, lTags()) + + const stats = await stat(inputPath) + + // If bigger than max allowed size we do a multipart upload + if (stats.size > CONFIG.OBJECT_STORAGE.MAX_UPLOAD_PART) { + return multiPartUpload({ inputPath, objectStorageKey, bucketInfo }) + } + + const fileStream = createReadStream(inputPath) + return objectStoragePut({ objectStorageKey, content: fileStream, bucketInfo }) +} + +async function removeObject (filename: string, bucketInfo: BucketInfo) { + const command = new DeleteObjectCommand({ + Bucket: bucketInfo.BUCKET_NAME, + Key: buildKey(filename, bucketInfo) + }) + + return getClient().send(command) +} + +async function removePrefix (prefix: string, bucketInfo: BucketInfo) { + const s3Client = getClient() + + const commandPrefix = bucketInfo.PREFIX + prefix + const listCommand = new ListObjectsV2Command({ + Bucket: bucketInfo.BUCKET_NAME, + Prefix: commandPrefix + }) + + const listedObjects = await s3Client.send(listCommand) + + // FIXME: use bulk delete when s3ninja will support this operation + // const deleteParams = { + // Bucket: bucketInfo.BUCKET_NAME, + // Delete: { Objects: [] } + // } + + if (isArray(listedObjects.Contents) !== true) { + const message = `Cannot remove ${commandPrefix} prefix in bucket ${bucketInfo.BUCKET_NAME}: no files listed.` + + logger.error(message, { response: listedObjects, ...lTags() }) + throw new Error(message) + } + + for (const object of listedObjects.Contents) { + const command = new DeleteObjectCommand({ + Bucket: bucketInfo.BUCKET_NAME, + Key: object.Key + }) + + await s3Client.send(command) + + // FIXME: use bulk delete when s3ninja will support this operation + // deleteParams.Delete.Objects.push({ Key: object.Key }) + } + + // FIXME: use bulk delete when s3ninja will support this operation + // const deleteCommand = new DeleteObjectsCommand(deleteParams) + // await s3Client.send(deleteCommand) + + // Repeat if not all objects could be listed at once (limit of 1000?) + if (listedObjects.IsTruncated) await removePrefix(prefix, bucketInfo) +} + +async function makeAvailable (options: { + key: string + destination: string + bucketInfo: BucketInfo +}) { + const { key, destination, bucketInfo } = options + + await ensureDir(dirname(options.destination)) + + const command = new GetObjectCommand({ + Bucket: bucketInfo.BUCKET_NAME, + Key: buildKey(key, bucketInfo) + }) + const response = await getClient().send(command) + + const file = createWriteStream(destination) + await pipelinePromise(response.Body as Readable, file) + + file.close() +} + +function buildKey (key: string, bucketInfo: BucketInfo) { + return bucketInfo.PREFIX + key +} + +// --------------------------------------------------------------------------- + +export { + BucketInfo, + buildKey, + storeObject, + removeObject, + removePrefix, + makeAvailable +} + +// --------------------------------------------------------------------------- + +async function objectStoragePut (options: { + objectStorageKey: string + content: ReadStream + bucketInfo: BucketInfo +}) { + const { objectStorageKey, content, bucketInfo } = options + + const command = new PutObjectCommand({ + Bucket: bucketInfo.BUCKET_NAME, + Key: buildKey(objectStorageKey, bucketInfo), + Body: content + }) + + await getClient().send(command) + + return getPrivateUrl(bucketInfo, objectStorageKey) +} + +async function multiPartUpload (options: { + inputPath: string + objectStorageKey: string + bucketInfo: BucketInfo +}) { + const { objectStorageKey, inputPath, bucketInfo } = options + + const key = buildKey(objectStorageKey, bucketInfo) + const s3Client = getClient() + + const statResult = await stat(inputPath) + + const createMultipartCommand = new CreateMultipartUploadCommand({ + Bucket: bucketInfo.BUCKET_NAME, + Key: key + }) + const createResponse = await s3Client.send(createMultipartCommand) + + const fd = await open(inputPath, 'r') + let partNumber = 1 + const parts: CompletedPart[] = [] + const partSize = CONFIG.OBJECT_STORAGE.MAX_UPLOAD_PART + + for (let start = 0; start < statResult.size; start += partSize) { + logger.debug( + 'Uploading part %d of file to %s%s in bucket %s', + partNumber, bucketInfo.PREFIX, objectStorageKey, bucketInfo.BUCKET_NAME, lTags() + ) + + // FIXME: Remove when https://github.com/aws/aws-sdk-js-v3/pull/2637 is released + // The s3 sdk needs to know the length of the http body beforehand, but doesn't support + // streams with start and end set, so it just tries to stat the file in stream.path. + // This fails for us because we only want to send part of the file. The stream type + // is modified so we can set the byteLength here, which s3 detects because array buffers + // have this field set + const stream: ReadStream & { byteLength: number } = + createReadStream( + inputPath, + { fd, autoClose: false, start, end: (start + partSize) - 1 } + ) as ReadStream & { byteLength: number } + + // Calculate if the part size is more than what's left over, and in that case use left over bytes for byteLength + stream.byteLength = min([ statResult.size - start, partSize ]) + + const uploadPartCommand = new UploadPartCommand({ + Bucket: bucketInfo.BUCKET_NAME, + Key: key, + UploadId: createResponse.UploadId, + PartNumber: partNumber, + Body: stream + }) + const uploadResponse = await s3Client.send(uploadPartCommand) + + parts.push({ ETag: uploadResponse.ETag, PartNumber: partNumber }) + partNumber += 1 + } + await close(fd) + + const completeUploadCommand = new CompleteMultipartUploadCommand({ + Bucket: bucketInfo.BUCKET_NAME, + Key: objectStorageKey, + UploadId: createResponse.UploadId, + MultipartUpload: { Parts: parts } + }) + await s3Client.send(completeUploadCommand) + + logger.debug( + 'Completed %s%s in bucket %s in %d parts', + bucketInfo.PREFIX, objectStorageKey, bucketInfo.BUCKET_NAME, partNumber - 1, lTags() + ) + + return getPrivateUrl(bucketInfo, objectStorageKey) +} diff --git a/server/lib/object-storage/urls.ts b/server/lib/object-storage/urls.ts new file mode 100644 index 000000000..2a889190b --- /dev/null +++ b/server/lib/object-storage/urls.ts @@ -0,0 +1,40 @@ +import { CONFIG } from '@server/initializers/config' +import { BucketInfo, buildKey, getEndpointParsed } from './shared' + +function getPrivateUrl (config: BucketInfo, keyWithoutPrefix: string) { + return getBaseUrl(config) + buildKey(keyWithoutPrefix, config) +} + +function getWebTorrentPublicFileUrl (fileUrl: string) { + const baseUrl = CONFIG.OBJECT_STORAGE.VIDEOS.BASE_URL + if (!baseUrl) return fileUrl + + return replaceByBaseUrl(fileUrl, baseUrl) +} + +function getHLSPublicFileUrl (fileUrl: string) { + const baseUrl = CONFIG.OBJECT_STORAGE.STREAMING_PLAYLISTS.BASE_URL + if (!baseUrl) return fileUrl + + return replaceByBaseUrl(fileUrl, baseUrl) +} + +export { + getPrivateUrl, + getWebTorrentPublicFileUrl, + replaceByBaseUrl, + getHLSPublicFileUrl +} + +// --------------------------------------------------------------------------- + +function getBaseUrl (bucketInfo: BucketInfo, baseUrl?: string) { + if (baseUrl) return baseUrl + + return `${getEndpointParsed().protocol}//${bucketInfo.BUCKET_NAME}.${getEndpointParsed().host}/` +} + +const regex = new RegExp('https?://[^/]+') +function replaceByBaseUrl (fileUrl: string, baseUrl: string) { + return fileUrl.replace(regex, baseUrl) +} diff --git a/server/lib/object-storage/videos.ts b/server/lib/object-storage/videos.ts new file mode 100644 index 000000000..15b8f58d5 --- /dev/null +++ b/server/lib/object-storage/videos.ts @@ -0,0 +1,72 @@ +import { join } from 'path' +import { logger } from '@server/helpers/logger' +import { CONFIG } from '@server/initializers/config' +import { MStreamingPlaylist, MVideoFile, MVideoUUID } from '@server/types/models' +import { getHLSDirectory } from '../paths' +import { generateHLSObjectBaseStorageKey, generateHLSObjectStorageKey, generateWebTorrentObjectStorageKey } from './keys' +import { lTags, makeAvailable, removeObject, removePrefix, storeObject } from './shared' + +function storeHLSFile (playlist: MStreamingPlaylist, video: MVideoUUID, filename: string) { + const baseHlsDirectory = getHLSDirectory(video) + + return storeObject({ + inputPath: join(baseHlsDirectory, filename), + objectStorageKey: generateHLSObjectStorageKey(playlist, video, filename), + bucketInfo: CONFIG.OBJECT_STORAGE.STREAMING_PLAYLISTS + }) +} + +function storeWebTorrentFile (filename: string) { + return storeObject({ + inputPath: join(CONFIG.STORAGE.VIDEOS_DIR, filename), + objectStorageKey: generateWebTorrentObjectStorageKey(filename), + bucketInfo: CONFIG.OBJECT_STORAGE.VIDEOS + }) +} + +function removeHLSObjectStorage (playlist: MStreamingPlaylist, video: MVideoUUID) { + return removePrefix(generateHLSObjectBaseStorageKey(playlist, video), CONFIG.OBJECT_STORAGE.STREAMING_PLAYLISTS) +} + +function removeWebTorrentObjectStorage (videoFile: MVideoFile) { + return removeObject(generateWebTorrentObjectStorageKey(videoFile.filename), CONFIG.OBJECT_STORAGE.VIDEOS) +} + +async function makeHLSFileAvailable (playlist: MStreamingPlaylist, video: MVideoUUID, filename: string, destination: string) { + const key = generateHLSObjectStorageKey(playlist, video, filename) + + logger.info('Fetching HLS file %s from object storage to %s.', key, destination, lTags()) + + await makeAvailable({ + key, + destination, + bucketInfo: CONFIG.OBJECT_STORAGE.STREAMING_PLAYLISTS + }) + + return destination +} + +async function makeWebTorrentFileAvailable (filename: string, destination: string) { + const key = generateWebTorrentObjectStorageKey(filename) + + logger.info('Fetching WebTorrent file %s from object storage to %s.', key, destination, lTags()) + + await makeAvailable({ + key, + destination, + bucketInfo: CONFIG.OBJECT_STORAGE.VIDEOS + }) + + return destination +} + +export { + storeWebTorrentFile, + storeHLSFile, + + removeHLSObjectStorage, + removeWebTorrentObjectStorage, + + makeWebTorrentFileAvailable, + makeHLSFileAvailable +} -- cgit v1.2.3