From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- .../site_config/standard/jetzt.sueddeutsche.de.txt | 40 +++++++++++----------- 1 file changed, 20 insertions(+), 20 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt (limited to 'inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt') diff --git a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt old mode 100644 new mode 100755 index 6e8af934..00e4cf63 --- a/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt +++ b/inc/3rdparty/site_config/standard/jetzt.sueddeutsche.de.txt @@ -1,22 +1,22 @@ -title: //h1 -author: //p[contains(@class, 'author')]/a -date: //p[contains(@class, 'time')] -body: //div[@class='content']/div[contains(@class, 'text')] - -# prevent "no text" errors on multi-page articles -tidy: no - -# we use a custom next-link detector instead of the print view because -# it's pretty hard to strip out the unwanted parts in the print view -autodetect_next_page: no -next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more '] - -strip: //h1 - -strip_id_or_class: meta -strip_id_or_class: author -strip_id_or_class: paging - -# prevent "Report an Error" from being recognized as footnote +title: //h1 +author: //p[contains(@class, 'author')]/a +date: //p[contains(@class, 'time')] +body: //div[@class='content']/div[contains(@class, 'text')] + +# prevent "no text" errors on multi-page articles +tidy: no + +# we use a custom next-link detector instead of the print view because +# it's pretty hard to strip out the unwanted parts in the print view +autodetect_next_page: no +next_page_link: //div[contains(@class, 'text')]/div/div[contains(@class, 'paging')]/a[@class='more '] + +strip: //h1 + +strip_id_or_class: meta +strip_id_or_class: author +strip_id_or_class: paging + +# prevent "Report an Error" from being recognized as footnote footnotes: no test_url: http://jetzt.sueddeutsche.de/texte/anzeigen/544308/Alles-flicken \ No newline at end of file -- cgit v1.2.3