aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/poche/Url.class.php
diff options
context:
space:
mode:
authorNicolas Lœuillet <nicolas.loeuillet@gmail.com>2013-08-08 09:36:10 -0700
committerNicolas Lœuillet <nicolas.loeuillet@gmail.com>2013-08-08 09:36:10 -0700
commit9a8b4ff4edf84d7df60de1b6fd1e493b59f88273 (patch)
tree3c8ab8086fd8a2750270f8aeaee1f1ce016167cb /inc/poche/Url.class.php
parent85ebc80c7eaf88e4d57a52adb8e4c32d8cc34b64 (diff)
parent572e758bf2e76308a3fa3eda9a8d9e9be8b53ecc (diff)
downloadwallabag-9a8b4ff4edf84d7df60de1b6fd1e493b59f88273.tar.gz
wallabag-9a8b4ff4edf84d7df60de1b6fd1e493b59f88273.tar.zst
wallabag-9a8b4ff4edf84d7df60de1b6fd1e493b59f88273.zip
Merge pull request #109 from inthepoche/dev
merge dev into master
Diffstat (limited to 'inc/poche/Url.class.php')
-rw-r--r--inc/poche/Url.class.php94
1 files changed, 94 insertions, 0 deletions
diff --git a/inc/poche/Url.class.php b/inc/poche/Url.class.php
new file mode 100644
index 00000000..f4a8f99e
--- /dev/null
+++ b/inc/poche/Url.class.php
@@ -0,0 +1,94 @@
1<?php
2/**
3 * poche, a read it later open source system
4 *
5 * @category poche
6 * @author Nicolas Lœuillet <support@inthepoche.com>
7 * @copyright 2013
8 * @license http://www.wtfpl.net/ see COPYING file
9 */
10
11class Url
12{
13 public $url;
14
15 function __construct($url)
16 {
17 $this->url = base64_decode($url);
18 }
19
20 public function getUrl() {
21 return $this->url;
22 }
23
24 public function setUrl($url) {
25 $this->url = $url;
26 }
27
28 public function isCorrect()
29 {
30 $pattern = '|^http(s)?://[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(/.*)?$|i';
31
32 return preg_match($pattern, $this->url);
33 }
34
35 public function clean()
36 {
37 $url = html_entity_decode(trim($this->url));
38
39 $stuff = strpos($url,'&utm_source=');
40 if ($stuff !== FALSE)
41 $url = substr($url, 0, $stuff);
42 $stuff = strpos($url,'?utm_source=');
43 if ($stuff !== FALSE)
44 $url = substr($url, 0, $stuff);
45 $stuff = strpos($url,'#xtor=RSS-');
46 if ($stuff !== FALSE)
47 $url = substr($url, 0, $stuff);
48
49 $this->url = $url;
50 }
51
52 public function fetchContent()
53 {
54 if ($this->isCorrect()) {
55 $this->clean();
56 $html = Encoding::toUTF8(Tools::getFile($this->getUrl()));
57
58 # if Tools::getFile() if not able to retrieve HTTPS content, try the same URL with HTTP protocol
59 if (!preg_match('!^https?://!i', $this->getUrl()) && (!isset($html) || strlen($html) <= 0)) {
60 $this->setUrl('http://' . $this->getUrl());
61 $html = Encoding::toUTF8(Tools::getFile($this->getUrl()));
62 }
63
64 if (function_exists('tidy_parse_string')) {
65 $tidy = tidy_parse_string($html, array(), 'UTF8');
66 $tidy->cleanRepair();
67 $html = $tidy->value;
68 }
69
70 $parameters = array();
71 if (isset($html) and strlen($html) > 0)
72 {
73 $readability = new Readability($html, $this->getUrl());
74 $readability->convertLinksToFootnotes = CONVERT_LINKS_FOOTNOTES;
75 $readability->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS;
76
77 if($readability->init())
78 {
79 $content = $readability->articleContent->innerHTML;
80 $parameters['title'] = $readability->articleTitle->innerHTML;
81 $parameters['content'] = $content;
82
83 return $parameters;
84 }
85 }
86 }
87 else {
88 #$msg->add('e', _('error during url preparation : the link is not valid'));
89 Tools::logm($this->getUrl() . ' is not a valid url');
90 }
91
92 return FALSE;
93 }
94} \ No newline at end of file