X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=server%2Flib%2Factivitypub%2Fcrawl.ts;h=eeafdf4ba8d04848242020af795f8fb4388cda8b;hb=9d94e5d7b96332d628ed835c67c2986289ead9b2;hp=9f4ca98bac28e494b8971535a2a7ecc415804f9f;hpb=2ba92871319d7af63472c1380664a9f9eeb1c690;p=github%2FChocobozzz%2FPeerTube.git diff --git a/server/lib/activitypub/crawl.ts b/server/lib/activitypub/crawl.ts index 9f4ca98ba..eeafdf4ba 100644 --- a/server/lib/activitypub/crawl.ts +++ b/server/lib/activitypub/crawl.ts @@ -1,8 +1,9 @@ -import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT } from '../../initializers' +import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants' import { doRequest } from '../../helpers/requests' import { logger } from '../../helpers/logger' import * as Bluebird from 'bluebird' import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub' +import { URL } from 'url' type HandlerFunction = (items: T[]) => (Promise | Bluebird) type CleanerFunction = (startedDate: Date) => (Promise | Bluebird) @@ -23,13 +24,26 @@ async function crawlCollectionPage (uri: string, handler: HandlerFunction const response = await doRequest>(options) const firstBody = response.body - let limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT + const limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT let i = 0 let nextLink = firstBody.first while (nextLink && i < limit) { - options.uri = nextLink + let body: any + + if (typeof nextLink === 'string') { + // Don't crawl ourselves + const remoteHost = new URL(nextLink).host + if (remoteHost === WEBSERVER.HOST) continue + + options.uri = nextLink + + const res = await doRequest>(options) + body = res.body + } else { + // nextLink is already the object we want + body = nextLink + } - const { body } = await doRequest>(options) nextLink = body.next i++