]> git.immae.eu Git - github/wallabag/wallabag.git/blobdiff - inc/poche/Url.class.php
Merge pull request #170 from NumEricR/login-button
[github/wallabag/wallabag.git] / inc / poche / Url.class.php
index f4a8f99e6377dee187142db7495227f8a49860c1..d7ee911fcbbf5c8287ff72a68edd2d98bdb74ec9 100644 (file)
@@ -27,9 +27,7 @@ class Url
 
     public function isCorrect()
     {
-        $pattern = '|^http(s)?://[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(/.*)?$|i';
-
-        return preg_match($pattern, $this->url);
+        return filter_var($this->url, FILTER_VALIDATE_URL) !== FALSE;
     }
 
     public function clean()
@@ -64,7 +62,15 @@ class Url
             if (function_exists('tidy_parse_string')) {
                 $tidy = tidy_parse_string($html, array(), 'UTF8');
                 $tidy->cleanRepair();
-                $html = $tidy->value;
+
+                //Warning: tidy might fail so, ensure there is still a content
+                $body = $tidy->body();
+
+                //hasChildren does not seem to work, just check the string
+                //returned (and do not forget to clean the white spaces)
+                if (preg_replace('/\s+/', '', $body->value) !== "<body></body>") {
+                    $html = $tidy->value;
+                }
             }
 
             $parameters = array();
@@ -77,7 +83,7 @@ class Url
                 if($readability->init())
                 {
                     $content = $readability->articleContent->innerHTML;
-                    $parameters['title'] = $readability->articleTitle->innerHTML;
+                    $parameters['title'] = ($readability->articleTitle->innerHTML != '' ? $readability->articleTitle->innerHTML : _('Untitled'));
                     $parameters['content'] = $content;
 
                     return $parameters;