public function isCorrect()
{
- $pattern = '|^http(s)?://[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(/.*)?$|i';
-
- return preg_match($pattern, $this->url);
+ return filter_var($this->url, FILTER_VALIDATE_URL) !== FALSE;
}
public function clean()
if (function_exists('tidy_parse_string')) {
$tidy = tidy_parse_string($html, array(), 'UTF8');
$tidy->cleanRepair();
- $html = $tidy->value;
+
+ //Warning: tidy might fail so, ensure there is still a content
+ $body = $tidy->body();
+
+ //hasChildren does not seem to work, just check the string
+ //returned (and do not forget to clean the white spaces)
+ if (preg_replace('/\s+/', '', $body->value) !== "<body></body>") {
+ $html = $tidy->value;
+ }
}
$parameters = array();
if($readability->init())
{
$content = $readability->articleContent->innerHTML;
- $parameters['title'] = $readability->articleTitle->innerHTML;
+ $parameters['title'] = ($readability->articleTitle->innerHTML != '' ? $readability->articleTitle->innerHTML : _('Untitled'));
$parameters['content'] = $content;
return $parameters;