X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;ds=sidebyside;f=server%2Flib%2Factivitypub%2Fcrawl.ts;h=0ba59b47dde4c0c90e7d133ab2333f756bdaf27a;hb=c0e71e849a40871ed8eea3dacd8608d380bdb490;hp=d4fc786f71b973d69965981954d88342ad12c90e;hpb=2186386cca113506791583cb07d6ccacba7af4e0;p=github%2FChocobozzz%2FPeerTube.git diff --git a/server/lib/activitypub/crawl.ts b/server/lib/activitypub/crawl.ts index d4fc786f7..0ba59b47d 100644 --- a/server/lib/activitypub/crawl.ts +++ b/server/lib/activitypub/crawl.ts @@ -1,8 +1,15 @@ -import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT } from '../../initializers' +import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants' import { doRequest } from '../../helpers/requests' import { logger } from '../../helpers/logger' +import * as Bluebird from 'bluebird' +import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub' +import { checkUrlsSameHost } from '../../helpers/activitypub' +import { parse } from "url" -async function crawlCollectionPage (uri: string, handler: (items: T[]) => Promise) { +type HandlerFunction = (items: T[]) => (Promise | Bluebird) +type CleanerFunction = (startedDate: Date) => (Promise | Bluebird) + +async function crawlCollectionPage (uri: string, handler: HandlerFunction, cleaner?: CleanerFunction) { logger.info('Crawling ActivityPub data on %s.', uri) const options = { @@ -13,16 +20,22 @@ async function crawlCollectionPage (uri: string, handler: (items: T[]) => Pr timeout: JOB_REQUEST_TIMEOUT } - const response = await doRequest(options) + const startDate = new Date() + + const response = await doRequest>(options) const firstBody = response.body let limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT let i = 0 let nextLink = firstBody.first while (nextLink && i < limit) { + // Don't crawl ourselves + const remoteHost = parse(nextLink).host + if (remoteHost === WEBSERVER.HOST) continue + options.uri = nextLink - const { body } = await doRequest(options) + const { body } = await doRequest>(options) nextLink = body.next i++ @@ -33,6 +46,8 @@ async function crawlCollectionPage (uri: string, handler: (items: T[]) => Pr await handler(items) } } + + if (cleaner) await cleaner(startDate) } export {