aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/bbc.com.txt
blob: 200dba63c8e28b58393631129b20e2938f57b460 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
body: //div[@class="story-body"]
# for video entries
body: //div[contains(@class, "videoInStory") or @id="meta-information"]
title: //h1[@class="story-header"]
date: //span[@class="story-date"]/span[@class='date']
# for sport site
date: //meta[@name='DCTERMS.created']/@content
author: //div[@id='headline']//span[@class='byline-name']

# recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055
body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1']

#strip: //div[@class="story-feature narrow"]
#strip: //div[@class="story-feature wide"]
#strip: //div[@class="story-feature dslideshow-enclosure"]
strip: //div[contains(@class, "story-feature") and not(contains(@class, 'full-width'))]
strip: //span[@class="story-date"]
#strip: //div[@class="caption body-narrow-width"]
strip: //div[@class="warning"]//p
strip: //div[@id='page-bookmark-links-head']
strip: //object
strip: //div[contains(@class, "bbccom_advert_placeholder")]
strip: //div[contains(@class, "embedded-hyper")]
strip: //div[contains(@class, 'market-data')]
strip: //a[contains(@class, 'hidden')]
strip: //div[contains(@class, 'hypertabs')]
strip: //div[contains(@class, 'related')]
strip: //form[@id='comment-form']
strip: //div[contains(@class, 'comment-introduction')]
strip: //div[contains(@class, 'share-tools')]
strip: //div[@id='also-related-links']

strip_id_or_class: share-help
strip_id_or_class: comments_module

find_string: http://ichef.bbci.co.uk/news/200/
replace_string: http://ichef.bbci.co.uk/news/624/

find_string: http://ichef.bbci.co.uk/news/304/
replace_string: http://ichef.bbci.co.uk/news/624/

replace_string(<noscript>): <div>
replace_string(</noscript>): </div>

native_ad_clue: //meta[@property="og:url" and contains(@content, '/sponsored/')]

tidy: no
prune: no

dissolve: //h2

test_url: http://www.bbc.com/sport/0/football/28918021
test_contains: Cameroonian footballer Albert Ebosse has died

test_url: http://www.bbc.com/sport/0/football/23224017

test_url: http://www.bbc.com/news/business-15060862
test_contains: Europe's leaders are meeting again to try


# news feed
test_url: http://feeds.bbci.co.uk/news/rss.xml
# sports feed
test_url: http://feeds.bbci.co.uk/sport/0/football/rss.xml?edition=int
# video entry
test_url: http://www.bbc.com/news/world-asia-22056933