]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/poche/Url.class.php
Merge pull request #170 from NumEricR/login-button
[github/wallabag/wallabag.git] / inc / poche / Url.class.php
CommitLineData
eb1af592
NL
1<?php
2/**
3 * poche, a read it later open source system
4 *
5 * @category poche
6 * @author Nicolas Lœuillet <support@inthepoche.com>
7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file
9 */
10
11class Url
12{
13 public $url;
14
15 function __construct($url)
16 {
17 $this->url = base64_decode($url);
18 }
19
20 public function getUrl() {
21 return $this->url;
22 }
23
24 public function setUrl($url) {
25 $this->url = $url;
26 }
27
28 public function isCorrect()
29 {
149df445 30 return filter_var($this->url, FILTER_VALIDATE_URL) !== FALSE;
eb1af592
NL
31 }
32
33 public function clean()
34 {
35 $url = html_entity_decode(trim($this->url));
36
37 $stuff = strpos($url,'&utm_source=');
38 if ($stuff !== FALSE)
39 $url = substr($url, 0, $stuff);
40 $stuff = strpos($url,'?utm_source=');
41 if ($stuff !== FALSE)
42 $url = substr($url, 0, $stuff);
43 $stuff = strpos($url,'#xtor=RSS-');
44 if ($stuff !== FALSE)
45 $url = substr($url, 0, $stuff);
46
47 $this->url = $url;
48 }
49
50 public function fetchContent()
51 {
52 if ($this->isCorrect()) {
53 $this->clean();
54 $html = Encoding::toUTF8(Tools::getFile($this->getUrl()));
55
56 # if Tools::getFile() if not able to retrieve HTTPS content, try the same URL with HTTP protocol
57 if (!preg_match('!^https?://!i', $this->getUrl()) && (!isset($html) || strlen($html) <= 0)) {
58 $this->setUrl('http://' . $this->getUrl());
59 $html = Encoding::toUTF8(Tools::getFile($this->getUrl()));
60 }
61
62 if (function_exists('tidy_parse_string')) {
63 $tidy = tidy_parse_string($html, array(), 'UTF8');
64 $tidy->cleanRepair();
2bf93dc0 65
70c39d16
NL
66 //Warning: tidy might fail so, ensure there is still a content
67 $body = $tidy->body();
2bf93dc0 68
70c39d16
NL
69 //hasChildren does not seem to work, just check the string
70 //returned (and do not forget to clean the white spaces)
71 if (preg_replace('/\s+/', '', $body->value) !== "<body></body>") {
72 $html = $tidy->value;
73 }
7374ff30 74 }
eb1af592
NL
75
76 $parameters = array();
77 if (isset($html) and strlen($html) > 0)
78 {
79 $readability = new Readability($html, $this->getUrl());
80 $readability->convertLinksToFootnotes = CONVERT_LINKS_FOOTNOTES;
81 $readability->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS;
82
83 if($readability->init())
84 {
85 $content = $readability->articleContent->innerHTML;
8340fedd 86 $parameters['title'] = ($readability->articleTitle->innerHTML != '' ? $readability->articleTitle->innerHTML : _('Untitled'));
eb1af592
NL
87 $parameters['content'] = $content;
88
89 return $parameters;
90 }
91 }
92 }
93 else {
94 #$msg->add('e', _('error during url preparation : the link is not valid'));
95 Tools::logm($this->getUrl() . ' is not a valid url');
96 }
97
98 return FALSE;
99 }
100}