1 title: substring-before(//title,':')
2 author: substring-after(substring-before(//div[@class='text']/b,'/'),'BY')
4 body: //div[@class='text']
6 strip: //a[contains(@href,'printart')]
7 strip_id_or_class: enlarge_photo
8 test_url: http://rogerebert.com/apps/pbcs.dll/article?AID=/20120411/REVIEWS/120419998/1005/GLOSSARY