]>
Commit | Line | Data |
---|---|---|
4e067cea NL |
1 | # This setup grabs the text from a Reddit self post. It ignores all comments etc. |
2 | ||
3 | title: //p[@class="title"]/a/text() | |
4 | ||
5 | author: //p[@class="tagline"]/a | |
6 | ||
7 | # this doesn't work for some reason...? | |
8 | date: //p[@class="tagline"]//@datetime | |
9 | ||
90a1a78b | 10 | body: (//div[contains(@class, 'noncollapsed')]//div[contains(@class, 'usertext-body')])[1] |
4e067cea NL |
11 | |
12 | strip_id_or_class: tagline | |
13 | strip_id_or_class: unvotable-message | |
14 | strip_id_or_class: buttons | |
15 | ||
16 | # follow the posted link (unless it's a self post - relative URL, no http://) | |
17 | single_page_link: //p[@class="title"]/a[contains(@href, 'http://')] | |
18 | ||
19 | test_url: http://www.reddit.com/r/truegaming/comments/wfe7r/i_wrote_about_the_problems_i_honestly_feel_that/ | |
90a1a78b NL |
20 | test_url: http://www.reddit.com/r/worldnews/comments/1as37r/twelve_north_korean_soldiers_attempting_to_defect/ |
21 | test_url: http://www.reddit.com/r/WritingPrompts/comments/2786lw/wp_in_a_world_where_puns_are_illegal_one_man/chybk8e |