1 strip: //*[@class="paginator"]
2 body: //*[@id="articleText"]
3 next_page_link: //a[@class="next"]
6 # No publishing date detection
7 # No author and intro deduplication over multiple pages
8 test_url: http://webwereld.nl/analyse/111452/de-code-van-dorifel-nader-bekeken.html