diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-07-13 10:15:40 +0200 |
commit | 4e067ceabd705201a16b4c92cf4b23f3b990326c (patch) | |
tree | 939f3a8e5ff3ab9ee414a57a895d3e78e1d46ce3 /inc/3rdparty/site_config/standard/cicero.de.txt | |
parent | 58dbe103889148def78b0fc8744d3f94c56a1561 (diff) | |
download | wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.gz wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.zst wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.zip |
updated specific configuration for parsing
Diffstat (limited to 'inc/3rdparty/site_config/standard/cicero.de.txt')
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/cicero.de.txt | 62 |
1 files changed, 31 insertions, 31 deletions
diff --git a/inc/3rdparty/site_config/standard/cicero.de.txt b/inc/3rdparty/site_config/standard/cicero.de.txt index b9f9a12b..b8913639 100644..100755 --- a/inc/3rdparty/site_config/standard/cicero.de.txt +++ b/inc/3rdparty/site_config/standard/cicero.de.txt | |||
@@ -1,33 +1,33 @@ | |||
1 | # fforst@... | 1 | # fforst@... |
2 | 2 | ||
3 | # Use link to print article for single page view | 3 | # Use link to print article for single page view |
4 | single_page_link: //a[@class="print"] | 4 | single_page_link: //a[@class="print"] |
5 | 5 | ||
6 | # set body | 6 | # set body |
7 | tidy: no | 7 | tidy: no |
8 | body: //div[@class='artikel-content'] | 8 | body: //div[@class='artikel-content'] |
9 | 9 | ||
10 | # strip title and subtitle since we got it already | 10 | # strip title and subtitle since we got it already |
11 | strip: //div[@class='issue'] | 11 | strip: //div[@class='issue'] |
12 | strip: //div[@class='artikel-content']/h2 | 12 | strip: //div[@class='artikel-content']/h2 |
13 | 13 | ||
14 | # some authors are known and have a link, others don't | 14 | # some authors are known and have a link, others don't |
15 | author: //a[contains(@href, 'autor?')] | 15 | author: //a[contains(@href, 'autor?')] |
16 | 16 | ||
17 | #date | 17 | #date |
18 | date: //span[@class='article-date'] | 18 | date: //span[@class='article-date'] |
19 | 19 | ||
20 | # Strip author since we got him | 20 | # Strip author since we got him |
21 | strip_id_or_class: author | 21 | strip_id_or_class: author |
22 | 22 | ||
23 | #strip captions | 23 | #strip captions |
24 | strip_id_or_class: field-name-field-image-credit | 24 | strip_id_or_class: field-name-field-image-credit |
25 | strip_id_or_class: field-name-field-article-image-subtitle | 25 | strip_id_or_class: field-name-field-article-image-subtitle |
26 | 26 | ||
27 | # remove community functions | 27 | # remove community functions |
28 | strip: //div[@class='meta'] | 28 | strip: //div[@class='meta'] |
29 | strip: //div[@id='comments'] | 29 | strip: //div[@id='comments'] |
30 | 30 | ||
31 | # remove "continue on the next page" text | 31 | # remove "continue on the next page" text |
32 | strip: //p[text()="[SEITE]"] | 32 | strip: //p[text()="[SEITE]"] |
33 | test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 \ No newline at end of file | 33 | test_url: http://www.cicero.de/weltbuehne/ihre-wut-ist-global-krise-jugend-revolten-aufstaende-zelte/43049 \ No newline at end of file |