]> git.immae.eu Git - github/Chocobozzz/PeerTube.git/blame - server/lib/activitypub/crawl.ts
Cleanup invalid rates/comments/shares
[github/Chocobozzz/PeerTube.git] / server / lib / activitypub / crawl.ts
CommitLineData
8fffe21a
C
1import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT } from '../../initializers'
2import { doRequest } from '../../helpers/requests'
3import { logger } from '../../helpers/logger'
edb4ffc7 4import * as Bluebird from 'bluebird'
5c6d985f 5import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub'
8fffe21a 6
2ba92871
C
7type HandlerFunction<T> = (items: T[]) => (Promise<any> | Bluebird<any>)
8type CleanerFunction = (startedDate: Date) => (Promise<any> | Bluebird<any>)
9
10async function crawlCollectionPage <T> (uri: string, handler: HandlerFunction<T>, cleaner?: CleanerFunction) {
8fffe21a
C
11 logger.info('Crawling ActivityPub data on %s.', uri)
12
13 const options = {
14 method: 'GET',
15 uri,
16 json: true,
17 activityPub: true,
18 timeout: JOB_REQUEST_TIMEOUT
19 }
20
2ba92871
C
21 const startDate = new Date()
22
5c6d985f 23 const response = await doRequest<ActivityPubOrderedCollection<T>>(options)
8fffe21a
C
24 const firstBody = response.body
25
26 let limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT
27 let i = 0
28 let nextLink = firstBody.first
29 while (nextLink && i < limit) {
30 options.uri = nextLink
31
5c6d985f 32 const { body } = await doRequest<ActivityPubOrderedCollection<T>>(options)
8fffe21a
C
33 nextLink = body.next
34 i++
35
36 if (Array.isArray(body.orderedItems)) {
37 const items = body.orderedItems
2186386c 38 logger.info('Processing %i ActivityPub items for %s.', items.length, options.uri)
8fffe21a
C
39
40 await handler(items)
41 }
42 }
2ba92871
C
43
44 if (cleaner) await cleaner(startDate)
8fffe21a
C
45}
46
47export {
48 crawlCollectionPage
49}