diff options
Diffstat (limited to 'inc/3rdparty/site_config')
-rwxr-xr-x | inc/3rdparty/site_config/custom/dailymotion.com.txt | 12 | ||||
-rw-r--r-- | inc/3rdparty/site_config/custom/index.php | 3 | ||||
-rw-r--r-- | inc/3rdparty/site_config/custom/mobile.lemondeinformatique.fr.txt | 6 | ||||
-rwxr-xr-x | inc/3rdparty/site_config/custom/ted.com.txt | 11 | ||||
-rw-r--r-- | inc/3rdparty/site_config/index.php | 5 | ||||
-rw-r--r-- | inc/3rdparty/site_config/standard/.about.com.txt | 14 | ||||
-rw-r--r-- | inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt | 9 | ||||
-rwxr-xr-x[-rw-r--r--] | inc/3rdparty/site_config/standard/politico.com.txt | 4 | ||||
-rw-r--r-- | inc/3rdparty/site_config/standard/version.txt | 2 |
9 files changed, 62 insertions, 4 deletions
diff --git a/inc/3rdparty/site_config/custom/dailymotion.com.txt b/inc/3rdparty/site_config/custom/dailymotion.com.txt new file mode 100755 index 00000000..0cad808f --- /dev/null +++ b/inc/3rdparty/site_config/custom/dailymotion.com.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | title: //title | ||
2 | body: //iframe | ||
3 | |||
4 | replace_string(<![CDATA[): _ | ||
5 | replace_string(]]>): _ | ||
6 | |||
7 | single_page_link: //link[@type='application/xml+oembed'] | ||
8 | |||
9 | prune: no | ||
10 | tidy: no | ||
11 | |||
12 | http://www.dailymotion.com/video/x1vk5oh_before-they-were-on-game-of-thrones_people | ||
diff --git a/inc/3rdparty/site_config/custom/index.php b/inc/3rdparty/site_config/custom/index.php new file mode 100644 index 00000000..a3d5f739 --- /dev/null +++ b/inc/3rdparty/site_config/custom/index.php | |||
@@ -0,0 +1,3 @@ | |||
1 | <?php | ||
2 | // this is here to prevent directory listing over the web | ||
3 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/custom/mobile.lemondeinformatique.fr.txt b/inc/3rdparty/site_config/custom/mobile.lemondeinformatique.fr.txt new file mode 100644 index 00000000..24aec5c3 --- /dev/null +++ b/inc/3rdparty/site_config/custom/mobile.lemondeinformatique.fr.txt | |||
@@ -0,0 +1,6 @@ | |||
1 | title: //h2 | ||
2 | body: div[@id='illustration'] | //p | ||
3 | prune: no | ||
4 | tidy: no | ||
5 | |||
6 | test_url: http://mobile.lemondeinformatique.fr/actualites/lire-les-datacenters-d-apple-google-et-facebook-eco-responsables-selon-greenpeace-le-monde-informatique-57122.html | ||
diff --git a/inc/3rdparty/site_config/custom/ted.com.txt b/inc/3rdparty/site_config/custom/ted.com.txt new file mode 100755 index 00000000..4940d2bc --- /dev/null +++ b/inc/3rdparty/site_config/custom/ted.com.txt | |||
@@ -0,0 +1,11 @@ | |||
1 | title: //title | ||
2 | body: //div[@class='talk-article__body talk-transcript__body'] | //div[@class='media__image media__image--thumb talk-link__image'] | ||
3 | |||
4 | strip_id_or_class: talk-transcript__para__time | ||
5 | |||
6 | single_page_link: //a[@id='hero-transcript-link'] | ||
7 | |||
8 | #prune: no | ||
9 | tidy: no | ||
10 | |||
11 | test_url: http://www.ted.com/talks/andrew_solomon_how_the_worst_moments_in_our_lives_make_us_who_we_are | ||
diff --git a/inc/3rdparty/site_config/index.php b/inc/3rdparty/site_config/index.php index a1b767fd..76ca8b3c 100644 --- a/inc/3rdparty/site_config/index.php +++ b/inc/3rdparty/site_config/index.php | |||
@@ -1,3 +1,2 @@ | |||
1 | <?php | 1 | <?php |
2 | // this is here to prevent directory listing over the web | 2 | // this is here to prevent directory listing over the web \ No newline at end of file |
3 | ?> \ No newline at end of file | ||
diff --git a/inc/3rdparty/site_config/standard/.about.com.txt b/inc/3rdparty/site_config/standard/.about.com.txt new file mode 100644 index 00000000..e1ebaee3 --- /dev/null +++ b/inc/3rdparty/site_config/standard/.about.com.txt | |||
@@ -0,0 +1,14 @@ | |||
1 | body: //div[@id='articlebody'] | ||
2 | title: //h1 | ||
3 | author: //p[@id='by']//a | ||
4 | |||
5 | next_page_link: //span[@class='next']/a | ||
6 | # Not the same as below! | ||
7 | |||
8 | prune: yes | ||
9 | tidy: no | ||
10 | |||
11 | # Annoying 'next' links plainly inside the article body | ||
12 | strip: //*[text()[contains(.,'Next: ')]] | ||
13 | |||
14 | test_url: http://psychology.about.com/od/theoriesofpersonality/ss/defensemech.htm | ||
diff --git a/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt b/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt new file mode 100644 index 00000000..24c949e9 --- /dev/null +++ b/inc/3rdparty/site_config/standard/moo.nac.uci.edu.txt | |||
@@ -0,0 +1,9 @@ | |||
1 | title: //div[@id='header']//h1[1] | ||
2 | |||
3 | body: //div[@id='content'] | ||
4 | |||
5 | strip_id_or_class: toc | ||
6 | |||
7 | prune: no | ||
8 | |||
9 | test_url: http://moo.nac.uci.edu/~hjm/HOWTO_move_data.html | ||
diff --git a/inc/3rdparty/site_config/standard/politico.com.txt b/inc/3rdparty/site_config/standard/politico.com.txt index 121fd5b9..c5302d1b 100644..100755 --- a/inc/3rdparty/site_config/standard/politico.com.txt +++ b/inc/3rdparty/site_config/standard/politico.com.txt | |||
@@ -4,10 +4,14 @@ body://div[contains(@class,"story-text")] | |||
4 | # Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] | 4 | # Why doesn't this work? next_page_link://ul[contains(@class,"pagination")]/li/a[@rel="next"] |
5 | 5 | ||
6 | next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a | 6 | next_page_link://ul[contains(@class,"pagination")]/li[contains(@class, "current")]/following-sibling::node()/a |
7 | next_page_link://div[contains(@class,"pagination")]/ol/li[contains(@class, "current")]/following-sibling::node()/a | ||
7 | date://meta[@name="publish_date"]/@content | 8 | date://meta[@name="publish_date"]/@content |
8 | 9 | ||
9 | strip://div[contains(@class, "breadcrumbs")] | 10 | strip://div[contains(@class, "breadcrumbs")] |
10 | strip://a[contains(@class, "hidden")] | 11 | strip://a[contains(@class, "hidden")] |
11 | strip://div[contains(@class, "story-embed")] | 12 | strip://div[contains(@class, "story-embed")] |
12 | strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/.. | 13 | strip://div[contains(@class, "story-text")]//p/a[contains(text(), "Also on POLITICO:")]/.. |
14 | strip://div[contains(@class, "story-interrupt")] | ||
15 | strip://footer[contains(@class, "author-bio")] | ||
16 | |||
13 | test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file | 17 | test_url: http://www.politico.com/news/stories/0712/78105.html \ No newline at end of file |
diff --git a/inc/3rdparty/site_config/standard/version.txt b/inc/3rdparty/site_config/standard/version.txt index bf0d87ab..eaf01ebd 100644 --- a/inc/3rdparty/site_config/standard/version.txt +++ b/inc/3rdparty/site_config/standard/version.txt | |||
@@ -1 +1 @@ | |||
4 \ No newline at end of file | 2013-05-12T22:53:07Z \ No newline at end of file | ||