public function isCorrect()
{
- $pattern = '|^(.*:)//([a-z\-.]+)(:[0-9]+)?(.*)$|i';
-
- return preg_match($pattern, $this->url);
+ return filter_var($this->url, FILTER_VALIDATE_URL) !== FALSE;
}
public function clean()
$tidy = tidy_parse_string($html, array(), 'UTF8');
$tidy->cleanRepair();
- //Warning: tidy might fail so, ensure there is still a content
- $body = $tidy->body();
+ //Warning: tidy might fail so, ensure there is still a content
+ $body = $tidy->body();
- //hasChildren does not seem to work, just check the string
- //returned (and do not forget to clean the white spaces)
- if (preg_replace('/\s+/', '', $body->value) !== "<body></body>")
- $html = $tidy->value;
- }
+ //hasChildren does not seem to work, just check the string
+ //returned (and do not forget to clean the white spaces)
+ if (preg_replace('/\s+/', '', $body->value) !== "<body></body>") {
+ $html = $tidy->value;
+ }
+ }
$parameters = array();
if (isset($html) and strlen($html) > 0)
if($readability->init())
{
$content = $readability->articleContent->innerHTML;
- $parameters['title'] = $readability->articleTitle->innerHTML;
+ $parameters['title'] = ($readability->articleTitle->innerHTML != '' ? $readability->articleTitle->innerHTML : _('Untitled'));
$parameters['content'] = $content;
return $parameters;