]>
Commit | Line | Data |
---|---|---|
41fb13c3 | 1 | import Bluebird from 'bluebird' |
a1587156 | 2 | import { URL } from 'url' |
41fb13c3 | 3 | import { retryTransactionWrapper } from '@server/helpers/database-utils' |
db4b15f2 C |
4 | import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub' |
5 | import { logger } from '../../helpers/logger' | |
6 | import { doJSONRequest } from '../../helpers/requests' | |
7500d6c9 | 7 | import { ACTIVITY_PUB, WEBSERVER } from '../../initializers/constants' |
8fffe21a | 8 | |
2ba92871 | 9 | type HandlerFunction<T> = (items: T[]) => (Promise<any> | Bluebird<any>) |
41fb13c3 | 10 | type CleanerFunction = (startedDate: Date) => Promise<any> |
2ba92871 | 11 | |
db4b15f2 C |
12 | async function crawlCollectionPage <T> (argUrl: string, handler: HandlerFunction<T>, cleaner?: CleanerFunction) { |
13 | let url = argUrl | |
14 | ||
15 | logger.info('Crawling ActivityPub data on %s.', url) | |
8fffe21a | 16 | |
7500d6c9 | 17 | const options = { activityPub: true } |
8fffe21a | 18 | |
2ba92871 C |
19 | const startDate = new Date() |
20 | ||
db4b15f2 | 21 | const response = await doJSONRequest<ActivityPubOrderedCollection<T>>(url, options) |
8fffe21a C |
22 | const firstBody = response.body |
23 | ||
a1587156 | 24 | const limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT |
8fffe21a C |
25 | let i = 0 |
26 | let nextLink = firstBody.first | |
27 | while (nextLink && i < limit) { | |
ee79b60e | 28 | let body: any |
c0e71e84 | 29 | |
ee79b60e C |
30 | if (typeof nextLink === 'string') { |
31 | // Don't crawl ourselves | |
a1587156 | 32 | const remoteHost = new URL(nextLink).host |
ee79b60e C |
33 | if (remoteHost === WEBSERVER.HOST) continue |
34 | ||
db4b15f2 | 35 | url = nextLink |
ee79b60e | 36 | |
db4b15f2 | 37 | const res = await doJSONRequest<ActivityPubOrderedCollection<T>>(url, options) |
ee79b60e C |
38 | body = res.body |
39 | } else { | |
40 | // nextLink is already the object we want | |
41 | body = nextLink | |
42 | } | |
8fffe21a | 43 | |
8fffe21a C |
44 | nextLink = body.next |
45 | i++ | |
46 | ||
47 | if (Array.isArray(body.orderedItems)) { | |
48 | const items = body.orderedItems | |
db4b15f2 | 49 | logger.info('Processing %i ActivityPub items for %s.', items.length, url) |
8fffe21a C |
50 | |
51 | await handler(items) | |
52 | } | |
53 | } | |
2ba92871 | 54 | |
4d029ef8 | 55 | if (cleaner) await retryTransactionWrapper(cleaner, startDate) |
8fffe21a C |
56 | } |
57 | ||
58 | export { | |
59 | crawlCollectionPage | |
60 | } |