From 969a91a1e3cff023139ae7fd065b81c921b713ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Thu, 3 Oct 2013 13:48:58 +0200 Subject: site_config is now embedded with poche --- inc/3rdparty/site_config/README.md | 6 ++++++ inc/3rdparty/site_config/custom/bfmtv.com.txt | 6 ++++++ inc/3rdparty/site_config/custom/inthepoche.com.txt | 7 +++++++ inc/3rdparty/site_config/custom/palkeo.com.txt | 0 inc/3rdparty/site_config/custom/tldp.org.txt | 6 ++++++ inc/3rdparty/site_config/index.php | 3 +++ inc/3rdparty/site_config/standard/.wikipedia.org.txt | 19 +++++++++++++++++++ inc/3rdparty/site_config/standard/index.php | 3 +++ inc/3rdparty/site_config/standard/version.php | 2 ++ 9 files changed, 52 insertions(+) create mode 100644 inc/3rdparty/site_config/README.md create mode 100644 inc/3rdparty/site_config/custom/bfmtv.com.txt create mode 100644 inc/3rdparty/site_config/custom/inthepoche.com.txt create mode 100644 inc/3rdparty/site_config/custom/palkeo.com.txt create mode 100644 inc/3rdparty/site_config/custom/tldp.org.txt create mode 100644 inc/3rdparty/site_config/index.php create mode 100644 inc/3rdparty/site_config/standard/.wikipedia.org.txt create mode 100644 inc/3rdparty/site_config/standard/index.php create mode 100644 inc/3rdparty/site_config/standard/version.php (limited to 'inc/3rdparty') diff --git a/inc/3rdparty/site_config/README.md b/inc/3rdparty/site_config/README.md new file mode 100644 index 00000000..0aff456b --- /dev/null +++ b/inc/3rdparty/site_config/README.md @@ -0,0 +1,6 @@ +Full-Text RSS Site Patterns +--------------------------- + +Site patterns allow you to specify what should be extracted from specific sites. + +Please see http://help.fivefilters.org/customer/portal/articles/223153-site-patterns for more information. \ No newline at end of file diff --git a/inc/3rdparty/site_config/custom/bfmtv.com.txt b/inc/3rdparty/site_config/custom/bfmtv.com.txt new file mode 100644 index 00000000..0ff28d56 --- /dev/null +++ b/inc/3rdparty/site_config/custom/bfmtv.com.txt @@ -0,0 +1,6 @@ +title: //title +body: //h2 | //span[@class='masque'] | //article[@class='corps_article_right'] +prune: no +tidy: no + +test_url: http://www.bfmtv.com/societe/cigarette-electronique-dangers-588622.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/custom/inthepoche.com.txt b/inc/3rdparty/site_config/custom/inthepoche.com.txt new file mode 100644 index 00000000..ede74b97 --- /dev/null +++ b/inc/3rdparty/site_config/custom/inthepoche.com.txt @@ -0,0 +1,7 @@ +title: //title +body: //div[@class='post-content'] + +prune: no +tidy: no + +test_url: http://www.inthepoche.com/?post/poche-hosting \ No newline at end of file diff --git a/inc/3rdparty/site_config/custom/palkeo.com.txt b/inc/3rdparty/site_config/custom/palkeo.com.txt new file mode 100644 index 00000000..e69de29b diff --git a/inc/3rdparty/site_config/custom/tldp.org.txt b/inc/3rdparty/site_config/custom/tldp.org.txt new file mode 100644 index 00000000..7dd5cdb5 --- /dev/null +++ b/inc/3rdparty/site_config/custom/tldp.org.txt @@ -0,0 +1,6 @@ +title: //title +body: //h2 | //p | //ul +prune: no +tidy: no + +test_url: http://www.tldp.org/HOWTO/Plug-and-Play-HOWTO-7.html \ No newline at end of file diff --git a/inc/3rdparty/site_config/index.php b/inc/3rdparty/site_config/index.php new file mode 100644 index 00000000..a3d5f739 --- /dev/null +++ b/inc/3rdparty/site_config/index.php @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/.wikipedia.org.txt b/inc/3rdparty/site_config/standard/.wikipedia.org.txt new file mode 100644 index 00000000..8b98ae4b --- /dev/null +++ b/inc/3rdparty/site_config/standard/.wikipedia.org.txt @@ -0,0 +1,19 @@ +title: //h1[@id='firstHeading'] +body: //div[@id = 'bodyContent'] +strip_id_or_class: editsection +#strip_id_or_class: toc +strip_id_or_class: vertical-navbox +strip: //table[@id='toc'] +strip: //div[@id='catlinks'] +strip: //div[@id='jump-to-nav'] +strip: //div[@class='thumbcaption']//div[@class='magnify'] +strip: //table[@class='navbox'] +strip: //table[contains(@class, 'infobox')] +strip: //div[@class='dablink'] +strip: //div[@id='contentSub'] +strip: //table[contains(@class, 'metadata')] +strip: //*[contains(@class, 'noprint')] +strip: //span[@title='pronunciation:'] +prune: no +tidy: no +test_url: http://en.wikipedia.org/wiki/Christopher_Lloyd \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/index.php b/inc/3rdparty/site_config/standard/index.php new file mode 100644 index 00000000..a3d5f739 --- /dev/null +++ b/inc/3rdparty/site_config/standard/index.php @@ -0,0 +1,3 @@ + \ No newline at end of file diff --git a/inc/3rdparty/site_config/standard/version.php b/inc/3rdparty/site_config/standard/version.php new file mode 100644 index 00000000..e61807ed --- /dev/null +++ b/inc/3rdparty/site_config/standard/version.php @@ -0,0 +1,2 @@ +