]>
Commit | Line | Data |
---|---|---|
eb1af592 NL |
1 | <?php |
2 | /** | |
3 | * poche, a read it later open source system | |
4 | * | |
5 | * @category poche | |
6 | * @author Nicolas Lœuillet <support@inthepoche.com> | |
7 | * @copyright 2013 | |
8 | * @license http://www.wtfpl.net/ see COPYING file | |
9 | */ | |
10 | ||
11 | class Url | |
12 | { | |
13 | public $url; | |
14 | ||
15 | function __construct($url) | |
16 | { | |
17 | $this->url = base64_decode($url); | |
18 | } | |
19 | ||
20 | public function getUrl() { | |
21 | return $this->url; | |
22 | } | |
23 | ||
24 | public function setUrl($url) { | |
25 | $this->url = $url; | |
26 | } | |
27 | ||
28 | public function isCorrect() | |
29 | { | |
0b05568c | 30 | $pattern = '|^(.*:)//([a-z\-.]+)(:[0-9]+)?(.*)$|i'; |
eb1af592 NL |
31 | |
32 | return preg_match($pattern, $this->url); | |
33 | } | |
34 | ||
35 | public function clean() | |
36 | { | |
37 | $url = html_entity_decode(trim($this->url)); | |
38 | ||
39 | $stuff = strpos($url,'&utm_source='); | |
40 | if ($stuff !== FALSE) | |
41 | $url = substr($url, 0, $stuff); | |
42 | $stuff = strpos($url,'?utm_source='); | |
43 | if ($stuff !== FALSE) | |
44 | $url = substr($url, 0, $stuff); | |
45 | $stuff = strpos($url,'#xtor=RSS-'); | |
46 | if ($stuff !== FALSE) | |
47 | $url = substr($url, 0, $stuff); | |
48 | ||
49 | $this->url = $url; | |
50 | } | |
51 | ||
52 | public function fetchContent() | |
53 | { | |
54 | if ($this->isCorrect()) { | |
55 | $this->clean(); | |
56 | $html = Encoding::toUTF8(Tools::getFile($this->getUrl())); | |
57 | ||
58 | # if Tools::getFile() if not able to retrieve HTTPS content, try the same URL with HTTP protocol | |
59 | if (!preg_match('!^https?://!i', $this->getUrl()) && (!isset($html) || strlen($html) <= 0)) { | |
60 | $this->setUrl('http://' . $this->getUrl()); | |
61 | $html = Encoding::toUTF8(Tools::getFile($this->getUrl())); | |
62 | } | |
63 | ||
64 | if (function_exists('tidy_parse_string')) { | |
65 | $tidy = tidy_parse_string($html, array(), 'UTF8'); | |
66 | $tidy->cleanRepair(); | |
67 | $html = $tidy->value; | |
68 | } | |
69 | ||
70 | $parameters = array(); | |
71 | if (isset($html) and strlen($html) > 0) | |
72 | { | |
73 | $readability = new Readability($html, $this->getUrl()); | |
74 | $readability->convertLinksToFootnotes = CONVERT_LINKS_FOOTNOTES; | |
75 | $readability->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS; | |
76 | ||
77 | if($readability->init()) | |
78 | { | |
79 | $content = $readability->articleContent->innerHTML; | |
80 | $parameters['title'] = $readability->articleTitle->innerHTML; | |
81 | $parameters['content'] = $content; | |
82 | ||
83 | return $parameters; | |
84 | } | |
85 | } | |
86 | } | |
87 | else { | |
88 | #$msg->add('e', _('error during url preparation : the link is not valid')); | |
89 | Tools::logm($this->getUrl() . ' is not a valid url'); | |
90 | } | |
91 | ||
92 | return FALSE; | |
93 | } | |
94 | } |