X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=server%2Flib%2Factivitypub%2Fcrawl.ts;h=278abf7de0b3a9637b380521dd9f78158535686f;hb=fd1b2d695320be5d86456c939b371b2e9b4f457b;hp=2675524c63037140e6aab3a00de023751921bb74;hpb=418d092afa81e2c8fe8ac6838fc4b5eb0af6a782;p=github%2FChocobozzz%2FPeerTube.git diff --git a/server/lib/activitypub/crawl.ts b/server/lib/activitypub/crawl.ts index 2675524c6..278abf7de 100644 --- a/server/lib/activitypub/crawl.ts +++ b/server/lib/activitypub/crawl.ts @@ -1,40 +1,60 @@ -import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT } from '../../initializers' -import { doRequest } from '../../helpers/requests' -import { logger } from '../../helpers/logger' import * as Bluebird from 'bluebird' +import { URL } from 'url' import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub' +import { logger } from '../../helpers/logger' +import { doJSONRequest } from '../../helpers/requests' +import { ACTIVITY_PUB, REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants' + +type HandlerFunction = (items: T[]) => (Promise | Bluebird) +type CleanerFunction = (startedDate: Date) => (Promise | Bluebird) -async function crawlCollectionPage (uri: string, handler: (items: T[]) => (Promise | Bluebird)) { - logger.info('Crawling ActivityPub data on %s.', uri) +async function crawlCollectionPage (argUrl: string, handler: HandlerFunction, cleaner?: CleanerFunction) { + let url = argUrl + + logger.info('Crawling ActivityPub data on %s.', url) const options = { - method: 'GET', - uri, - json: true, activityPub: true, - timeout: JOB_REQUEST_TIMEOUT + timeout: REQUEST_TIMEOUT } - const response = await doRequest>(options) + const startDate = new Date() + + const response = await doJSONRequest>(url, options) const firstBody = response.body - let limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT + const limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT let i = 0 let nextLink = firstBody.first while (nextLink && i < limit) { - options.uri = nextLink + let body: any + + if (typeof nextLink === 'string') { + // Don't crawl ourselves + const remoteHost = new URL(nextLink).host + if (remoteHost === WEBSERVER.HOST) continue + + url = nextLink + + const res = await doJSONRequest>(url, options) + body = res.body + } else { + // nextLink is already the object we want + body = nextLink + } - const { body } = await doRequest>(options) nextLink = body.next i++ if (Array.isArray(body.orderedItems)) { const items = body.orderedItems - logger.info('Processing %i ActivityPub items for %s.', items.length, options.uri) + logger.info('Processing %i ActivityPub items for %s.', items.length, url) await handler(items) } } + + if (cleaner) await cleaner(startDate) } export {