]> git.immae.eu Git - github/wallabag/wallabag.git/blobdiff - inc/poche/Url.class.php
poche is dead, welcome wallabag
[github/wallabag/wallabag.git] / inc / poche / Url.class.php
index d7ee911fcbbf5c8287ff72a68edd2d98bdb74ec9..aba236fa5b57f24bf8d6b479fa532730ffef7dea 100644 (file)
@@ -1,9 +1,9 @@
 <?php
 /**
- * poche, a read it later open source system
+ * wallabag, self hostable application allowing you to not miss any content anymore
  *
- * @category   poche
- * @author     Nicolas LÅ“uillet <support@inthepoche.com>
+ * @category   wallabag
+ * @author     Nicolas LÅ“uillet <nicolas@loeuillet.org>
  * @copyright  2013
  * @license    http://www.wtfpl.net/ see COPYING file
  */
@@ -25,76 +25,7 @@ class Url
         $this->url = $url;
     }
 
-    public function isCorrect()
-    {
+    public function isCorrect() {
         return filter_var($this->url, FILTER_VALIDATE_URL) !== FALSE;
     }
-
-    public function clean()
-    {
-        $url = html_entity_decode(trim($this->url));
-
-        $stuff = strpos($url,'&utm_source=');
-        if ($stuff !== FALSE)
-            $url = substr($url, 0, $stuff);
-        $stuff = strpos($url,'?utm_source=');
-        if ($stuff !== FALSE)
-            $url = substr($url, 0, $stuff);
-        $stuff = strpos($url,'#xtor=RSS-');
-        if ($stuff !== FALSE)
-            $url = substr($url, 0, $stuff);
-
-        $this->url = $url;
-    }
-
-    public function fetchContent()
-    {
-        if ($this->isCorrect()) {
-            $this->clean();
-            $html = Encoding::toUTF8(Tools::getFile($this->getUrl()));
-
-            # if Tools::getFile() if not able to retrieve HTTPS content, try the same URL with HTTP protocol
-            if (!preg_match('!^https?://!i', $this->getUrl()) && (!isset($html) || strlen($html) <= 0)) {
-                $this->setUrl('http://' . $this->getUrl());
-                $html = Encoding::toUTF8(Tools::getFile($this->getUrl()));
-            }
-
-            if (function_exists('tidy_parse_string')) {
-                $tidy = tidy_parse_string($html, array(), 'UTF8');
-                $tidy->cleanRepair();
-
-                //Warning: tidy might fail so, ensure there is still a content
-                $body = $tidy->body();
-
-                //hasChildren does not seem to work, just check the string
-                //returned (and do not forget to clean the white spaces)
-                if (preg_replace('/\s+/', '', $body->value) !== "<body></body>") {
-                    $html = $tidy->value;
-                }
-            }
-
-            $parameters = array();
-            if (isset($html) and strlen($html) > 0)
-            {
-                $readability = new Readability($html, $this->getUrl());
-                $readability->convertLinksToFootnotes = CONVERT_LINKS_FOOTNOTES;
-                $readability->revertForcedParagraphElements = REVERT_FORCED_PARAGRAPH_ELEMENTS;
-
-                if($readability->init())
-                {
-                    $content = $readability->articleContent->innerHTML;
-                    $parameters['title'] = ($readability->articleTitle->innerHTML != '' ? $readability->articleTitle->innerHTML : _('Untitled'));
-                    $parameters['content'] = $content;
-
-                    return $parameters;
-                }
-            }
-        }
-        else {
-            #$msg->add('e', _('error during url preparation : the link is not valid'));
-            Tools::logm($this->getUrl() . ' is not a valid url');
-        }
-
-        return FALSE;
-    }
 }
\ No newline at end of file