From fb9df0c269f36703909b8b259abbdbed29881ecd Mon Sep 17 00:00:00 2001 From: tcit Date: Thu, 24 Jul 2014 21:56:04 +0200 Subject: use directly MOBIClass --- inc/3rdparty/libraries/MOBIClass/OnlineArticle.php | 116 +++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 inc/3rdparty/libraries/MOBIClass/OnlineArticle.php (limited to 'inc/3rdparty/libraries/MOBIClass/OnlineArticle.php') diff --git a/inc/3rdparty/libraries/MOBIClass/OnlineArticle.php b/inc/3rdparty/libraries/MOBIClass/OnlineArticle.php new file mode 100644 index 00000000..ec3182fe --- /dev/null +++ b/inc/3rdparty/libraries/MOBIClass/OnlineArticle.php @@ -0,0 +1,116 @@ +init(); + if(!isset($this->metadata["title"])){ + $this->metadata["title"] = CharacterEntities::convert(strip_tags($r->getTitle()->innerHTML)); + } + if(!isset($this->metadata["author"])){ + $parts = parse_url($url); + $this->metadata["author"] = $parts["host"]; + } + + $article = $r->getContent()->innerHTML; + if(substr($article, 0, 5) == ""; + }else{ + $article = "".$article.""; + } + $doc = new DOMDocument(); + @$doc->loadHTML($article) or die($article); + $doc->normalizeDocument(); + + $this->images = $this->handleImages($doc, $url); + $this->text = $doc->saveHTML(); + } + + /** + * Get the text data to be integrated in the MOBI file + * @return string + */ + public function getTextData(){ + return $this->text; + } + /** + * Get the images (an array containing the jpeg data). Array entry 0 will + * correspond to image record 0. + * @return array + */ + public function getImages(){ + return $this->images; + } + /** + * Get the metadata in the form of a hashtable (for example, title or author). + * @return array + */ + public function getMetaData(){ + return $this->metadata; + } + /** + * + * @param DOMElement $dom + * @return array + */ + private function handleImages($dom, $url){ + $images = array(); + + $parts = parse_url($url); + + $savedImages = array(); + + $imgElements = $dom->getElementsByTagName('img'); + foreach($imgElements as $img) { + $src = $img->getAttribute("src"); + + $is_root = false; + if(substr($src, 0, 1) == "/"){ + $is_root = true; + } + + $parsed = parse_url($src); + + if(!isset($parsed["host"])){ + if($is_root){ + $src = http_build_url($url, $parsed, HTTP_URL_REPLACE); + }else{ + $src = http_build_url($url, $parsed, HTTP_URL_JOIN_PATH); + } + } + $img->setAttribute("src", ""); + if(isset($savedImages[$src])){ + $img->setAttribute("recindex", $savedImages[$src]); + }else{ + $image = ImageHandler::DownloadImage($src); + + if($image !== false){ + $images[$this->imgCounter] = new FileRecord(new Record($image)); + + $img->setAttribute("recindex", $this->imgCounter); + $savedImages[$src] = $this->imgCounter; + $this->imgCounter++; + } + } + } + + return $images; + } +} +?> -- cgit v1.2.3