]>
Commit | Line | Data |
---|---|---|
c0e71e84 | 1 | import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants' |
8fffe21a C |
2 | import { doRequest } from '../../helpers/requests' |
3 | import { logger } from '../../helpers/logger' | |
edb4ffc7 | 4 | import * as Bluebird from 'bluebird' |
5c6d985f | 5 | import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub' |
c28bcdd1 | 6 | import { parse } from 'url' |
8fffe21a | 7 | |
2ba92871 C |
8 | type HandlerFunction<T> = (items: T[]) => (Promise<any> | Bluebird<any>) |
9 | type CleanerFunction = (startedDate: Date) => (Promise<any> | Bluebird<any>) | |
10 | ||
11 | async function crawlCollectionPage <T> (uri: string, handler: HandlerFunction<T>, cleaner?: CleanerFunction) { | |
8fffe21a C |
12 | logger.info('Crawling ActivityPub data on %s.', uri) |
13 | ||
14 | const options = { | |
15 | method: 'GET', | |
16 | uri, | |
17 | json: true, | |
18 | activityPub: true, | |
19 | timeout: JOB_REQUEST_TIMEOUT | |
20 | } | |
21 | ||
2ba92871 C |
22 | const startDate = new Date() |
23 | ||
5c6d985f | 24 | const response = await doRequest<ActivityPubOrderedCollection<T>>(options) |
8fffe21a C |
25 | const firstBody = response.body |
26 | ||
27 | let limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT | |
28 | let i = 0 | |
29 | let nextLink = firstBody.first | |
30 | while (nextLink && i < limit) { | |
c0e71e84 C |
31 | // Don't crawl ourselves |
32 | const remoteHost = parse(nextLink).host | |
33 | if (remoteHost === WEBSERVER.HOST) continue | |
34 | ||
8fffe21a C |
35 | options.uri = nextLink |
36 | ||
5c6d985f | 37 | const { body } = await doRequest<ActivityPubOrderedCollection<T>>(options) |
8fffe21a C |
38 | nextLink = body.next |
39 | i++ | |
40 | ||
41 | if (Array.isArray(body.orderedItems)) { | |
42 | const items = body.orderedItems | |
2186386c | 43 | logger.info('Processing %i ActivityPub items for %s.', items.length, options.uri) |
8fffe21a C |
44 | |
45 | await handler(items) | |
46 | } | |
47 | } | |
2ba92871 C |
48 | |
49 | if (cleaner) await cleaner(startDate) | |
8fffe21a C |
50 | } |
51 | ||
52 | export { | |
53 | crawlCollectionPage | |
54 | } |