diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard/golem.de.txt')
-rwxr-xr-x | inc/3rdparty/site_config/standard/golem.de.txt | 53 |
1 files changed, 31 insertions, 22 deletions
diff --git a/inc/3rdparty/site_config/standard/golem.de.txt b/inc/3rdparty/site_config/standard/golem.de.txt index 6afdebe8..c64860c0 100755 --- a/inc/3rdparty/site_config/standard/golem.de.txt +++ b/inc/3rdparty/site_config/standard/golem.de.txt | |||
@@ -1,25 +1,34 @@ | |||
1 | # Jens Kohl, jens.kohl@... | 1 | # Author: zinnober |
2 | # - Added publication date | 2 | # Rewrite of original template which fetched the printer-version without pictures |
3 | # - Striped pagination block | ||
4 | # - Added single page link | ||
5 | # - Added xpath-querys for the printer friendly version | ||
6 | 3 | ||
7 | title: //h1 | 4 | tidy: no |
8 | body: //div[@class='formatted'] | ||
9 | prune: no | 5 | prune: no |
10 | 6 | ||
11 | date: substring-after(//li[2][@class="text1"], 'Datum:') | 7 | # Set full title |
12 | strip: //ol[@class="list-chapters"] | 8 | title: //h1 |
13 | strip_comments: yes | 9 | |
14 | 10 | date: //time | |
15 | # next: commands for printer friendly pages | 11 | |
16 | single_page_link: //a[contains(@href, 'print.php?a=')]/@href | 12 | # Content is here |
17 | title: //body/h3 | 13 | body: //article |
18 | strip_image_src: staticrl/images/logo.jpg | 14 | |
19 | strip_image_src: http://cpx.golem.de/cpx.php?class=7 | 15 | # Fetch full multipage articles |
20 | strip: //body/h3 | 16 | next_page_link: //a[@id='atoc_next'] |
21 | strip: //body/b[1] | 17 | |
22 | strip: //body/b[2] | 18 | # Remove tracking and ads |
23 | strip: //body/b[3] | 19 | strip_id_or_class: iqadtile4 |
24 | strip: //div[1] | 20 | |
25 | test_url: http://www.golem.de/1112/88696.html \ No newline at end of file | 21 | # General Cleanup |
22 | strip_id_or_class: list-jtoc | ||
23 | strip_id_or_class: table-jtoc | ||
24 | strip_id_or_class: implied | ||
25 | strip_id_or_class: social- | ||
26 | strip_id_or_class: comments | ||
27 | strip_id_or_class: footer | ||
28 | |||
29 | # Tidy up galleries (could still be improved, though) | ||
30 | strip: //img[@src=''] | ||
31 | |||
32 | # Try yourself | ||
33 | test_url: http://www.golem.de/news/intel-core-i7-5960x-im-test-die-pc-revolution-beginnt-mit-octacore-und-ddr4-1408-108893.html | ||
34 | test_url: http://www.golem.de/news/test-infamous-first-light-neonbunter-actionspass-1408-108914.html | ||