diff options
Diffstat (limited to 'server/lib/activitypub/crawl.ts')
-rw-r--r-- | server/lib/activitypub/crawl.ts | 25 |
1 files changed, 12 insertions, 13 deletions
diff --git a/server/lib/activitypub/crawl.ts b/server/lib/activitypub/crawl.ts index 1ed105bbe..278abf7de 100644 --- a/server/lib/activitypub/crawl.ts +++ b/server/lib/activitypub/crawl.ts | |||
@@ -1,27 +1,26 @@ | |||
1 | import { ACTIVITY_PUB, REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants' | ||
2 | import { doRequest } from '../../helpers/requests' | ||
3 | import { logger } from '../../helpers/logger' | ||
4 | import * as Bluebird from 'bluebird' | 1 | import * as Bluebird from 'bluebird' |
5 | import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub' | ||
6 | import { URL } from 'url' | 2 | import { URL } from 'url' |
3 | import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub' | ||
4 | import { logger } from '../../helpers/logger' | ||
5 | import { doJSONRequest } from '../../helpers/requests' | ||
6 | import { ACTIVITY_PUB, REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants' | ||
7 | 7 | ||
8 | type HandlerFunction<T> = (items: T[]) => (Promise<any> | Bluebird<any>) | 8 | type HandlerFunction<T> = (items: T[]) => (Promise<any> | Bluebird<any>) |
9 | type CleanerFunction = (startedDate: Date) => (Promise<any> | Bluebird<any>) | 9 | type CleanerFunction = (startedDate: Date) => (Promise<any> | Bluebird<any>) |
10 | 10 | ||
11 | async function crawlCollectionPage <T> (uri: string, handler: HandlerFunction<T>, cleaner?: CleanerFunction) { | 11 | async function crawlCollectionPage <T> (argUrl: string, handler: HandlerFunction<T>, cleaner?: CleanerFunction) { |
12 | logger.info('Crawling ActivityPub data on %s.', uri) | 12 | let url = argUrl |
13 | |||
14 | logger.info('Crawling ActivityPub data on %s.', url) | ||
13 | 15 | ||
14 | const options = { | 16 | const options = { |
15 | method: 'GET', | ||
16 | uri, | ||
17 | json: true, | ||
18 | activityPub: true, | 17 | activityPub: true, |
19 | timeout: REQUEST_TIMEOUT | 18 | timeout: REQUEST_TIMEOUT |
20 | } | 19 | } |
21 | 20 | ||
22 | const startDate = new Date() | 21 | const startDate = new Date() |
23 | 22 | ||
24 | const response = await doRequest<ActivityPubOrderedCollection<T>>(options) | 23 | const response = await doJSONRequest<ActivityPubOrderedCollection<T>>(url, options) |
25 | const firstBody = response.body | 24 | const firstBody = response.body |
26 | 25 | ||
27 | const limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT | 26 | const limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT |
@@ -35,9 +34,9 @@ async function crawlCollectionPage <T> (uri: string, handler: HandlerFunction<T> | |||
35 | const remoteHost = new URL(nextLink).host | 34 | const remoteHost = new URL(nextLink).host |
36 | if (remoteHost === WEBSERVER.HOST) continue | 35 | if (remoteHost === WEBSERVER.HOST) continue |
37 | 36 | ||
38 | options.uri = nextLink | 37 | url = nextLink |
39 | 38 | ||
40 | const res = await doRequest<ActivityPubOrderedCollection<T>>(options) | 39 | const res = await doJSONRequest<ActivityPubOrderedCollection<T>>(url, options) |
41 | body = res.body | 40 | body = res.body |
42 | } else { | 41 | } else { |
43 | // nextLink is already the object we want | 42 | // nextLink is already the object we want |
@@ -49,7 +48,7 @@ async function crawlCollectionPage <T> (uri: string, handler: HandlerFunction<T> | |||
49 | 48 | ||
50 | if (Array.isArray(body.orderedItems)) { | 49 | if (Array.isArray(body.orderedItems)) { |
51 | const items = body.orderedItems | 50 | const items = body.orderedItems |
52 | logger.info('Processing %i ActivityPub items for %s.', items.length, options.uri) | 51 | logger.info('Processing %i ActivityPub items for %s.', items.length, url) |
53 | 52 | ||
54 | await handler(items) | 53 | await handler(items) |
55 | } | 54 | } |