]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | body: //div[@class="story-body"] |
2 | # for video entries | |
3 | body: //div[contains(@class, "videoInStory") or @id="meta-information"] | |
4 | title: //h1[@class="story-header"] | |
5 | date: //span[@class="story-date"]/span[@class='date'] | |
6 | # for sport site | |
7 | date: //meta[@name='DCTERMS.created']/@content | |
8 | author: //div[@id='headline']//span[@class='byline-name'] | |
9 | ||
10 | # recipes, e.g. http://www.bbc.co.uk/food/recipes/mymincepies_71055 | |
11 | body: //div[contains(@class, 'hrecipe')]//div[@id='subcolumn-1'] | |
12 | ||
13 | #strip: //div[@class="story-feature narrow"] | |
14 | #strip: //div[@class="story-feature wide"] | |
15 | #strip: //div[@class="story-feature dslideshow-enclosure"] | |
90a1a78b | 16 | strip: //div[contains(@class, "story-feature") and not(contains(@class, 'full-width'))] |
4e067cea NL |
17 | strip: //span[@class="story-date"] |
18 | #strip: //div[@class="caption body-narrow-width"] | |
19 | strip: //div[@class="warning"]//p | |
20 | strip: //div[@id='page-bookmark-links-head'] | |
21 | strip: //object | |
22 | strip: //div[contains(@class, "bbccom_advert_placeholder")] | |
23 | strip: //div[contains(@class, "embedded-hyper")] | |
24 | strip: //div[contains(@class, 'market-data')] | |
25 | strip: //a[contains(@class, 'hidden')] | |
26 | strip: //div[contains(@class, 'hypertabs')] | |
27 | strip: //div[contains(@class, 'related')] | |
28 | strip: //form[@id='comment-form'] | |
29 | strip: //div[contains(@class, 'comment-introduction')] | |
30 | strip: //div[contains(@class, 'share-tools')] | |
31 | strip: //div[@id='also-related-links'] | |
32 | ||
90a1a78b NL |
33 | strip_id_or_class: share-help |
34 | strip_id_or_class: comments_module | |
35 | ||
4e067cea NL |
36 | replace_string(<noscript>): <div> |
37 | replace_string(</noscript>): </div> | |
38 | ||
90a1a78b | 39 | tidy: no |
4e067cea NL |
40 | prune: no |
41 | ||
42 | dissolve: //h2 | |
90a1a78b | 43 | |
4e067cea | 44 | test_url: http://www.bbc.co.uk/sport/0/football/23224017 |
90a1a78b NL |
45 | test_contains: Swansea City have completed the club-record signing |
46 | ||
4e067cea | 47 | test_url: http://www.bbc.co.uk/news/business-15060862 |
90a1a78b NL |
48 | test_contains: Europe's leaders are meeting again to try to solve |
49 | ||
50 | # news feed | |
51 | test_url: http://feeds.bbci.co.uk/news/rss.xml | |
52 | # sports feed | |
53 | test_url: http://feeds.bbci.co.uk/sport/0/football/rss.xml?edition=int | |
4e067cea | 54 | # video entry |
90a1a78b | 55 | test_url: http://www.bbc.co.uk/news/world-asia-22056933 |