diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/cbsnews.com.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/cbsnews.com.txt | 29 |
1 files changed, 15 insertions, 14 deletions
diff --git a/inc/3rdparty/site_config/standard/cbsnews.com.txt b/inc/3rdparty/site_config/standard/cbsnews.com.txt index 4ba3da19..04d20230 100644..100755 --- a/inc/3rdparty/site_config/standard/cbsnews.com.txt +++ b/inc/3rdparty/site_config/standard/cbsnews.com.txt | |||
@@ -1,14 +1,15 @@ | |||
1 | date: //meta[@name="published"]/@content | 1 | date: //meta[@name="published"]/@content |
2 | date: //div[@class="timeLine"] | 2 | date: //div[@class="timeLine"] |
3 | title: //div[@id='contentBody']//h1 | 3 | title: //div[@id='contentBody']//h1 |
4 | author: //dl[@class="storyBlogByline"]/dd/a | 4 | author: //dl[@class="storyBlogByline"]/dd/a |
5 | body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')] | 5 | body: //div[@id='storyMediaBox'] | //div[contains(@class, 'storyText')] |
6 | 6 | ||
7 | # Content Pruning | 7 | # Content Pruning |
8 | strip: //div[@class="scrollingArrows"] | 8 | strip: //div[@class="scrollingArrows"] |
9 | strip: //div[@class="timeLine"] | 9 | strip: //div[@class="timeLine"] |
10 | strip: //dl[@class="storyBlogByline"] | 10 | strip: //dl[@class="storyBlogByline"] |
11 | 11 | strip: //span[@class='image-credit'] | |
12 | prune: no | 12 | |
13 | 13 | prune: no | |
14 | test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/ \ No newline at end of file | 14 | |
15 | test_url: http://www.cbsnews.com/8301-201_162-57366361/rescued-americans-dad-proud-of-the-u.s/ | ||