X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=inc%2F3rdparty%2Fsite_config%2Fstandard%2Ffnal.gov.txt;h=e404ccb802a40a6bb4ba1d249b4ac6d155a725e2;hb=4e067ceabd705201a16b4c92cf4b23f3b990326c;hp=7faa6bfc1bb8c37d8645e458fa1ffd1a6b7664d6;hpb=7f667839764621b5aa01c9db8ce5dde2a29ef18f;p=github%2Fwallabag%2Fwallabag.git diff --git a/inc/3rdparty/site_config/standard/fnal.gov.txt b/inc/3rdparty/site_config/standard/fnal.gov.txt old mode 100644 new mode 100755 index 7faa6bfc..e404ccb8 --- a/inc/3rdparty/site_config/standard/fnal.gov.txt +++ b/inc/3rdparty/site_config/standard/fnal.gov.txt @@ -1,15 +1,15 @@ -title: normalize(//h1) - -author: //td/p[position()=last()]/em - -# I swear, this is really the best way to do this -date: normalize(//td[contains(@style, "color: #ffffff")]) - -# my god, it's full of tables -body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td -strip: //h1 - -# the following two lines strip the byline at the end of the article (the byline is a
that consists of an em dash and then some text in an ). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output.
-strip: //p[position()=last()]/em
+title: normalize(//h1)
+
+author: //td/p[position()=last()]/em
+
+# I swear, this is really the best way to do this
+date: normalize(//td[contains(@style, "color: #ffffff")])
+
+# my god, it's full of tables
+body: /table/tbody/tr[5]//table/tbody//table/tbody/tr/td
+strip: //h1
+
+# the following two lines strip the byline at the end of the article (the byline is a that consists of an em dash and then some text in an ). I have no idea why I can't just strip //p[position()=last()], but trying to do so includes a bunch of other crap in the output.
+strip: //p[position()=last()]/em
strip: //p[position()=last()]/child::text()
test_url: http://www.fnal.gov/pub/today/archive_2011/today11-11-09_MuonDepartmentReadMore.html
\ No newline at end of file