diff options
Diffstat (limited to 'inc/3rdparty/site_config/standard')
-rw-r--r-- | inc/3rdparty/site_config/standard/.about.com.txt | 14 | ||||
-rwxr-xr-x | inc/3rdparty/site_config/standard/dn.pt.txt | 9 |
2 files changed, 23 insertions, 0 deletions
diff --git a/inc/3rdparty/site_config/standard/.about.com.txt b/inc/3rdparty/site_config/standard/.about.com.txt new file mode 100644 index 00000000..e1ebaee3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/.about.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | body: //div[@id='articlebody'] | ||
2 | title: //h1 | ||
3 | author: //p[@id='by']//a | ||
4 | |||
5 | next_page_link: //span[@class='next']/a | ||
6 | # Not the same as below! | ||
7 | |||
8 | prune: yes | ||
9 | tidy: no | ||
10 | |||
11 | # Annoying 'next' links plainly inside the article body | ||
12 | strip: //*[text()[contains(.,'Next: ')]] | ||
13 | |||
14 | test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm | ||
diff --git a/inc/3rdparty/site_config/standard/dn.pt.txt b/inc/3rdparty/site_config/standard/dn.pt.txt new file mode 100755 index 00000000..051b8cb9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/dn.pt.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | single_page_link: concat('http://www.dn.pt/Common/print.aspx?content_id=', //input[@type='hidden' and @name='link-comments']/@value) | ||
2 | #<input type="hidden" name="link-comments" class="link-comments" value="3972244"> | ||
3 | |||
4 | title: //h1 | ||
5 | author: //div[@class="Author"] | ||
6 | |||
7 | strip: //div[@class="Patrocinio"] | ||
8 | |||
9 | test_url: http://www.dn.pt/inicio/opiniao/interior.aspx?content_id=3972244&seccao=Alberto%20Gon%E7alves&tag=Opini%E3o%20-%20Em%20Foco&page=1 \ No newline at end of file | ||