prune: no author: //meta[@name="dc.publisher"]/@content date: //meta[@name="dc.date"]/@content strip: //p[contains(@class, 'contributor vcard')] replace_string(