X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=server%2Flib%2Factivitypub%2Fcrawl.ts;h=9e469e3e614c84a78f9d68bdd84922cfd481295b;hb=cf59a2a0c367683ba35758419499bf6087c192ec;hp=1b9b14c2e55177657a655e1de9a81143ebcec39f;hpb=ae28cdf327d782e629379eee1999096ca2a5d74b;p=github%2FChocobozzz%2FPeerTube.git diff --git a/server/lib/activitypub/crawl.ts b/server/lib/activitypub/crawl.ts index 1b9b14c2e..9e469e3e6 100644 --- a/server/lib/activitypub/crawl.ts +++ b/server/lib/activitypub/crawl.ts @@ -1,10 +1,14 @@ -import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT } from '../../initializers' +import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants' import { doRequest } from '../../helpers/requests' import { logger } from '../../helpers/logger' import * as Bluebird from 'bluebird' import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub' +import { parse } from 'url' -async function crawlCollectionPage (uri: string, handler: (items: T[]) => Promise | Bluebird) { +type HandlerFunction = (items: T[]) => (Promise | Bluebird) +type CleanerFunction = (startedDate: Date) => (Promise | Bluebird) + +async function crawlCollectionPage (uri: string, handler: HandlerFunction, cleaner?: CleanerFunction) { logger.info('Crawling ActivityPub data on %s.', uri) const options = { @@ -15,6 +19,8 @@ async function crawlCollectionPage (uri: string, handler: (items: T[]) => Pr timeout: JOB_REQUEST_TIMEOUT } + const startDate = new Date() + const response = await doRequest>(options) const firstBody = response.body @@ -22,9 +28,22 @@ async function crawlCollectionPage (uri: string, handler: (items: T[]) => Pr let i = 0 let nextLink = firstBody.first while (nextLink && i < limit) { - options.uri = nextLink + let body: any + + if (typeof nextLink === 'string') { + // Don't crawl ourselves + const remoteHost = parse(nextLink).host + if (remoteHost === WEBSERVER.HOST) continue + + options.uri = nextLink + + const res = await doRequest>(options) + body = res.body + } else { + // nextLink is already the object we want + body = nextLink + } - const { body } = await doRequest>(options) nextLink = body.next i++ @@ -35,6 +54,8 @@ async function crawlCollectionPage (uri: string, handler: (items: T[]) => Pr await handler(items) } } + + if (cleaner) await cleaner(startDate) } export {