From 4e067ceabd705201a16b4c92cf4b23f3b990326c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20L=C5=93uillet?= Date: Sun, 13 Jul 2014 10:15:40 +0200 Subject: updated specific configuration for parsing --- inc/3rdparty/site_config/standard/500px.com.txt | 50 ++++++++++++------------- 1 file changed, 25 insertions(+), 25 deletions(-) mode change 100644 => 100755 inc/3rdparty/site_config/standard/500px.com.txt (limited to 'inc/3rdparty/site_config/standard/500px.com.txt') diff --git a/inc/3rdparty/site_config/standard/500px.com.txt b/inc/3rdparty/site_config/standard/500px.com.txt old mode 100644 new mode 100755 index 68e6b2d0..b9b7e9dd --- a/inc/3rdparty/site_config/standard/500px.com.txt +++ b/inc/3rdparty/site_config/standard/500px.com.txt @@ -1,27 +1,27 @@ -# very loose setup for both 500px.com/photo/* and 500px.com/blog/* -# photo page example: http://500px.com/photo/4181666 -# blog page example: http://500px.com/blog/110 - -# avoid "no text" error -tidy:no -prune:no - -# reorganize photo page elements -#body://div[contains(@class,'container')] -move_into(body)://div[contains(@id,'thephoto')] -move_into(body)://div[contains(@id,'description')] -move_into(body)://div[contains(@id,'tags')] -move_into(body)://div[contains(@id,'photo-info')] - -# clean photo page info -strip://span[contains(@id,'copyright')] -strip://*[contains(@id,'store')] -strip://*[contains(@id,'user-info')] -strip://*[contains(@id,'photo-stats')] -strip://*[contains(@id,'voting_controls_container')] -strip://*[contains(@id,'more-photos')] -strip://*[contains(@id,'embed-photo')] - -# clean blog page side bar +# very loose setup for both 500px.com/photo/* and 500px.com/blog/* +# photo page example: http://500px.com/photo/4181666 +# blog page example: http://500px.com/blog/110 + +# avoid "no text" error +tidy:no +prune:no + +# reorganize photo page elements +#body://div[contains(@class,'container')] +move_into(body)://div[contains(@id,'thephoto')] +move_into(body)://div[contains(@id,'description')] +move_into(body)://div[contains(@id,'tags')] +move_into(body)://div[contains(@id,'photo-info')] + +# clean photo page info +strip://span[contains(@id,'copyright')] +strip://*[contains(@id,'store')] +strip://*[contains(@id,'user-info')] +strip://*[contains(@id,'photo-stats')] +strip://*[contains(@id,'voting_controls_container')] +strip://*[contains(@id,'more-photos')] +strip://*[contains(@id,'embed-photo')] + +# clean blog page side bar strip://*[contains(@class,'col d3 clearafter')] test_url: http://500px.com/photo/3641041?from=editors \ No newline at end of file -- cgit v1.2.3