aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/zeit.de.txt
diff options
context:
space:
mode:
authortcitworld <thomas.citharet@gmail.com>2014-01-04 12:30:31 -0800
committertcitworld <thomas.citharet@gmail.com>2014-01-04 12:30:31 -0800
commit7f667839764621b5aa01c9db8ce5dde2a29ef18f (patch)
tree93d8241ee81c87e18494325ae02f0589a8e328a2 /inc/3rdparty/site_config/standard/zeit.de.txt
parenta84f77d6ba15a64ff00453f5d5190c021ce460ed (diff)
parent2abcccb37180c17318f5226f5d4bc28f30b621ea (diff)
downloadwallabag-7f667839764621b5aa01c9db8ce5dde2a29ef18f.tar.gz
wallabag-7f667839764621b5aa01c9db8ce5dde2a29ef18f.tar.zst
wallabag-7f667839764621b5aa01c9db8ce5dde2a29ef18f.zip
Merge pull request #1 from inthepoche/dev
Dev
Diffstat (limited to 'inc/3rdparty/site_config/standard/zeit.de.txt')
-rw-r--r--inc/3rdparty/site_config/standard/zeit.de.txt44
1 files changed, 44 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/zeit.de.txt b/inc/3rdparty/site_config/standard/zeit.de.txt
new file mode 100644
index 00000000..66a7f1ac
--- /dev/null
+++ b/inc/3rdparty/site_config/standard/zeit.de.txt
@@ -0,0 +1,44 @@
1# 2012-12-23 [carlo@...] fixed half-assed headlines in articles, removed inline author profiles, adjusted picture captions
2# 2012-03-17 [dkless@...] Cut metadata parts in the beginning and the ends of the content block; copyright entries for pictures removed; Author fixed, not sure if old entries still valid (I left them); Weird problems with some pages addressed (see last section for removing hidden section)
3# 2011-12-09 [carlo@...] Removed "related articles" block
4# 2011-08-23 [carlo@...] changed single page link to use print version: page works better, less ambiguity. Related cleanups and simplifications.
5# 2011-08-20 [carlo@...] added author, fixed date
6
7
8single_page_link: //a[@title='Druckversion']
9tidy: no
10
11title: //title
12date: substring-before( //li[@class="date"], " " )
13author: //li[@class="author"]/a/text() | //li[@class="author first"]/a/text()
14author: substring-after(//li[@class='source first '], 'Quelle: ')
15
16strip_id_or_class: articleheader
17strip: //div[@id="comments"] | //div[@class="pagination block"] | //p[@class="ressortbacklink"] | //div[@id="relatedArticles"] | // div[@class="inline portrait"]
18
19#Removes author and date from the start
20strip: //ul[@class="tools"]
21#Removes copyright statement - often disturb as first line of the news
22strip: //p[@class="copyright"]
23strip: //div[@class="copyright"]
24#Removes pagination links at the end
25strip: //div[@class="pagination"]
26
27# Fix picture captions
28wrap_in(small): //p[@class="caption"]/text()
29
30# Fix sub-headlines
31wrap_in(h2): //p/strong
32dissolve: //h2/strong
33
34#Sometimes things are embedded in the print version that are not displayed on the web, but will be displayed in the mobilized versions and lead even to problems. These sections are removed here.
35strip_id_or_class:"informatives"
36strip_id_or_class:"bottom"
37strip_id_or_class:"teasermosaic"
38strip_id_or_class:"comments"
39strip_id_or_class:"articlefooter af"
40strip_id_or_class:"relateds"
41strip_id_or_class:"pagination"
42
43footnotes: no
44test_url: http://www.zeit.de/kultur/film/2012-12/Kurzfilmtag \ No newline at end of file