]> git.immae.eu Git - github/wallabag/wallabag.git/blobdiff - inc/3rdparty/site_config/standard/bostonglobe.com.txt
updated specific configuration for parsing
[github/wallabag/wallabag.git] / inc / 3rdparty / site_config / standard / bostonglobe.com.txt
old mode 100644 (file)
new mode 100755 (executable)
index d3e6f43..4c74a34
@@ -1,16 +1,16 @@
-# NOTE:  If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com.\r
-\r
-title: //div[@class="header"]/h1\r
-author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ")\r
-date: //div[@class="byline"]/p[last()]\r
-body: //div[@class="article-body"]\r
-\r
-strip_id_or_class: aside\r
-strip_id_or_class: promo\r
-strip_id_or_class: skip-nav\r
-strip_id_or_class: article-more\r
-strip_id_or_class: article-bar\r
-\r
-# This removes image captions.  If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed.\r
+# NOTE:  If testing this configuration yields bad results, including junk text like "Try BostonGlobe.com today" and "THIS STORY APPEARED IN", please replace the Test URL with a current-day headline link from bostonglobe.com.
+
+title: //div[@class="header"]/h1
+author: substring-after(//div[@class="byline"]/h2[@class="author"],"By ")
+date: //div[@class="byline"]/p[last()]
+body: //div[@class="article-body"]
+
+strip_id_or_class: aside
+strip_id_or_class: promo
+strip_id_or_class: skip-nav
+strip_id_or_class: article-more
+strip_id_or_class: article-bar
+
+# This removes image captions.  If the parser starts saving images from bostonglobe.com (currently, it does not), then this directive should be removed.
 strip_id_or_class: figure
 test_url: http://bostonglobe.com/news/nation/2012/03/17/illinois-primary-could-pivotal/PsDzFZqvhEYyXbOcF9FOkO/story.html
\ No newline at end of file