]>
Commit | Line | Data |
---|---|---|
1 | import * as Bluebird from 'bluebird' | |
2 | import { URL } from 'url' | |
3 | import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub' | |
4 | import { logger } from '../../helpers/logger' | |
5 | import { doJSONRequest } from '../../helpers/requests' | |
6 | import { ACTIVITY_PUB, REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants' | |
7 | ||
8 | type HandlerFunction<T> = (items: T[]) => (Promise<any> | Bluebird<any>) | |
9 | type CleanerFunction = (startedDate: Date) => (Promise<any> | Bluebird<any>) | |
10 | ||
11 | async function crawlCollectionPage <T> (argUrl: string, handler: HandlerFunction<T>, cleaner?: CleanerFunction) { | |
12 | let url = argUrl | |
13 | ||
14 | logger.info('Crawling ActivityPub data on %s.', url) | |
15 | ||
16 | const options = { | |
17 | activityPub: true, | |
18 | timeout: REQUEST_TIMEOUT | |
19 | } | |
20 | ||
21 | const startDate = new Date() | |
22 | ||
23 | const response = await doJSONRequest<ActivityPubOrderedCollection<T>>(url, options) | |
24 | const firstBody = response.body | |
25 | ||
26 | const limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT | |
27 | let i = 0 | |
28 | let nextLink = firstBody.first | |
29 | while (nextLink && i < limit) { | |
30 | let body: any | |
31 | ||
32 | if (typeof nextLink === 'string') { | |
33 | // Don't crawl ourselves | |
34 | const remoteHost = new URL(nextLink).host | |
35 | if (remoteHost === WEBSERVER.HOST) continue | |
36 | ||
37 | url = nextLink | |
38 | ||
39 | const res = await doJSONRequest<ActivityPubOrderedCollection<T>>(url, options) | |
40 | body = res.body | |
41 | } else { | |
42 | // nextLink is already the object we want | |
43 | body = nextLink | |
44 | } | |
45 | ||
46 | nextLink = body.next | |
47 | i++ | |
48 | ||
49 | if (Array.isArray(body.orderedItems)) { | |
50 | const items = body.orderedItems | |
51 | logger.info('Processing %i ActivityPub items for %s.', items.length, url) | |
52 | ||
53 | await handler(items) | |
54 | } | |
55 | } | |
56 | ||
57 | if (cleaner) await cleaner(startDate) | |
58 | } | |
59 | ||
60 | export { | |
61 | crawlCollectionPage | |
62 | } |