]>
git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/libraries/MOBIClass/OnlineArticle.php
4 * Description of OnlineArticle
8 class OnlineArticle
extends ContentProvider
{
11 private $metadata = array();
12 private $imgCounter = 0;
14 public function __construct($url) {
15 if (!preg_match('!^https?://!i', $url)) $url = 'http://'.$url;
17 $data = Http
::Request($url);
18 //$enc = mb_detect_encoding($str, "UTF-8,ISO-8859-1,ASCII");
19 $html = mb_convert_encoding($data, "UTF-8", "UTF-8,ISO-8859-1,ASCII");
20 //$html = utf8_encode($html);
21 $r = new Readability($html, $url);
23 if(!isset($this->metadata
["title"])){
24 $this->metadata
["title"] = CharacterEntities
::convert(strip_tags($r->getTitle()->innerHTML
));
26 if(!isset($this->metadata
["author"])){
27 $parts = parse_url($url);
28 $this->metadata
["author"] = $parts["host"];
31 $article = $r->getContent()->innerHTML
;
32 if(substr($article, 0, 5) == "<body"){
33 $article = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/></head>".$article."</html>";
35 $article = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/></head><body>".$article."</body></html>";
37 $doc = new DOMDocument();
38 @$doc->loadHTML($article) or die($article);
39 $doc->normalizeDocument();
41 $this->images
= $this->handleImages($doc, $url);
42 $this->text
= $doc->saveHTML();
46 * Get the text data to be integrated in the MOBI file
49 public function getTextData(){
53 * Get the images (an array containing the jpeg data). Array entry 0 will
54 * correspond to image record 0.
57 public function getImages(){
61 * Get the metadata in the form of a hashtable (for example, title or author).
64 public function getMetaData(){
65 return $this->metadata
;
69 * @param DOMElement $dom
72 private function handleImages($dom, $url){
75 $parts = parse_url($url);
77 $savedImages = array();
79 $imgElements = $dom->getElementsByTagName('img');
80 foreach($imgElements as $img) {
81 $src = $img->getAttribute("src");
84 if(substr($src, 0, 1) == "/"){
88 $parsed = parse_url($src);
90 if(!isset($parsed["host"])){
92 $src = http_build_url($url, $parsed, HTTP_URL_REPLACE
);
94 $src = http_build_url($url, $parsed, HTTP_URL_JOIN_PATH
);
97 $img->setAttribute("src", "");
98 if(isset($savedImages[$src])){
99 $img->setAttribute("recindex", $savedImages[$src]);
101 $image = ImageHandler
::DownloadImage($src);
103 if($image !== false){
104 $images[$this->imgCounter
] = new FileRecord(new Record($image));
106 $img->setAttribute("recindex", $this->imgCounter
);
107 $savedImages[$src] = $this->imgCounter
;