aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/site_config/standard/theglobalmail.org.txt
diff options
context:
space:
mode:
authorNicolas LÅ“uillet <nicolas@loeuillet.org>2014-07-13 10:15:40 +0200
committerNicolas LÅ“uillet <nicolas@loeuillet.org>2014-07-13 10:15:40 +0200
commit4e067ceabd705201a16b4c92cf4b23f3b990326c (patch)
tree939f3a8e5ff3ab9ee414a57a895d3e78e1d46ce3 /inc/3rdparty/site_config/standard/theglobalmail.org.txt
parent58dbe103889148def78b0fc8744d3f94c56a1561 (diff)
downloadwallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.gz
wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.tar.zst
wallabag-4e067ceabd705201a16b4c92cf4b23f3b990326c.zip
updated specific configuration for parsing
Diffstat (limited to 'inc/3rdparty/site_config/standard/theglobalmail.org.txt')
-rwxr-xr-x[-rw-r--r--]inc/3rdparty/site_config/standard/theglobalmail.org.txt78
1 files changed, 39 insertions, 39 deletions
diff --git a/inc/3rdparty/site_config/standard/theglobalmail.org.txt b/inc/3rdparty/site_config/standard/theglobalmail.org.txt
index fae0fb29..da1c84f9 100644..100755
--- a/inc/3rdparty/site_config/standard/theglobalmail.org.txt
+++ b/inc/3rdparty/site_config/standard/theglobalmail.org.txt
@@ -1,41 +1,41 @@
1title: //h1[@id="headline"] 1title: //h1[@id="headline"]
2author: //div[contains(@class, "editorial-byline-author")]/a 2author: //div[contains(@class, "editorial-byline-author")]/a
3date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ") 3date: substring-after(//div[contains(@class, "editorial-byline-meta")], " | ")
4 4
5# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed 5# The article body contains a mix or article and non-article elements, so lot of manual tweaks are needed
6body: //div[@id="template"] 6body: //div[@id="template"]
7strip_id_or_class: editorial-byline-pic 7strip_id_or_class: editorial-byline-pic
8strip_id_or_class: editorial-byline 8strip_id_or_class: editorial-byline
9strip_id_or_class: headline 9strip_id_or_class: headline
10 10
11# Include the leadin paragraph in the body text, but remove quotes because they're out of context 11# Include the leadin paragraph in the body text, but remove quotes because they're out of context
12dissolve: //div[contains(@id, "leadin")] 12dissolve: //div[contains(@id, "leadin")]
13strip_id_or_class: pullquote 13strip_id_or_class: pullquote
14 14
15# Image captions removed because they're confusing in body text 15# Image captions removed because they're confusing in body text
16strip_id_or_class: image-caption-content 16strip_id_or_class: image-caption-content
17 17
18# Remove header and footer 18# Remove header and footer
19strip_id_or_class: header 19strip_id_or_class: header
20strip_id_or_class: footer 20strip_id_or_class: footer
21 21
22# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image 22# Remove the hidden logo that seems to be used to cause Facebook to show the logo instead of a random article image
23strip: /html/body/span[contains(@style, "display: none")] 23strip: /html/body/span[contains(@style, "display: none")]
24 24
25# Remove search box 25# Remove search box
26strip_id_or_class: searchContainer 26strip_id_or_class: searchContainer
27strip: //div[contains(@class, "searchInstruction")] 27strip: //div[contains(@class, "searchInstruction")]
28strip: //div[contains(@class, "searchResults")]/h4 28strip: //div[contains(@class, "searchResults")]/h4
29 29
30# Remove the 'Letters to the Editor' section 30# Remove the 'Letters to the Editor' section
31strip_id_or_class: letter-text 31strip_id_or_class: letter-text
32strip_id_or_class: letter-from 32strip_id_or_class: letter-from
33strip_id_or_class: letter-date 33strip_id_or_class: letter-date
34 34
35# Remove Like/Tweet links 35# Remove Like/Tweet links
36strip_id_or_class: social-tab 36strip_id_or_class: social-tab
37 37
38# Remove 'divider' which causes an inexplicable slash to appear in the article body 38# Remove 'divider' which causes an inexplicable slash to appear in the article body
39strip_id_or_class: divider 39strip_id_or_class: divider
40 40
41test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/ \ No newline at end of file 41test_url: http://www.theglobalmail.org/feature/tiramisu-time-in-pyongyang/88/ \ No newline at end of file