From 87090d8ae7582708d20f3c09fb37d780af860bcd Mon Sep 17 00:00:00 2001 From: tcit Date: Thu, 24 Apr 2014 03:04:02 +0200 Subject: Added epub export function --- .../libraries/PHPePub/EPubChapterSplitter.php | 201 +++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 inc/3rdparty/libraries/PHPePub/EPubChapterSplitter.php (limited to 'inc/3rdparty/libraries/PHPePub/EPubChapterSplitter.php') diff --git a/inc/3rdparty/libraries/PHPePub/EPubChapterSplitter.php b/inc/3rdparty/libraries/PHPePub/EPubChapterSplitter.php new file mode 100644 index 00000000..1d44f238 --- /dev/null +++ b/inc/3rdparty/libraries/PHPePub/EPubChapterSplitter.php @@ -0,0 +1,201 @@ + + * @copyright 2009-2014 A. Grandt + * @license GNU LGPL 2.1 + * @link http://www.phpclasses.org/package/6115 + * @link https://github.com/Grandt/PHPePub + * @version 3.20 + */ +class EPubChapterSplitter { + const VERSION = 3.20; + + private $splitDefaultSize = 250000; + private $bookVersion = EPub::BOOK_VERSION_EPUB2; + + /** + * + * Enter description here ... + * + * @param unknown_type $ident + */ + function setVersion($bookVersion) { + $this->bookVersion = is_string($bookVersion) ? trim($bookVersion) : EPub::BOOK_VERSION_EPUB2; + } + + /** + * Set default chapter target size. + * Default is 250000 bytes, and minimum is 10240 bytes. + * + * @param $size segment size in bytes + * @return void + */ + function setSplitSize($size) { + $this->splitDefaultSize = (int)$size; + if ($size < 10240) { + $this->splitDefaultSize = 10240; // Making the file smaller than 10k is not a good idea. + } + } + + /** + * Get the chapter target size. + * + * @return $size + */ + function getSplitSize() { + return $this->splitDefaultSize; + } + + /** + * Split $chapter into multiple parts. + * + * The search string can either be a regular string or a PHP PECL Regular Expression pattern as defined here: http://www.php.net/manual/en/pcre.pattern.php + * If the search string is a regular string, the matching will be for lines in the HTML starting with the string given + * + * @param String $chapter XHTML file + * @param Bool $splitOnSearchString Split on chapter boundaries, Splitting on search strings disables the split size check. + * @param String $searchString Chapter string to search for can be fixed text, or a regular expression pattern. + * + * @return array with 1 or more parts + */ + function splitChapter($chapter, $splitOnSearchString = false, $searchString = '/^Chapter\\ /i') { + $chapterData = array(); + $isSearchRegexp = $splitOnSearchString && (preg_match('#^(\D|\S|\W).+\1[imsxeADSUXJu]*$#m', $searchString) == 1); + if ($splitOnSearchString && !$isSearchRegexp) { + $searchString = '#^<.+?>' . preg_quote($searchString, '#') . "#"; + } + + if (!$splitOnSearchString && strlen($chapter) <= $this->splitDefaultSize) { + return array($chapter); + } + + $xmlDoc = new DOMDocument(); + @$xmlDoc->loadHTML($chapter); + + $head = $xmlDoc->getElementsByTagName("head"); + $body = $xmlDoc->getElementsByTagName("body"); + + $htmlPos = stripos($chapter, "", $htmlPos); + $newXML = substr($chapter, 0, $htmlEndPos+1) . "\n"; + if (strpos(trim($newXML), "\n" . $newXML; + } + $headerLength = strlen($newXML); + + $files = array(); + $chapterNames = array(); + $domDepth = 0; + $domPath = array(); + $domClonedPath = array(); + + $curFile = $xmlDoc->createDocumentFragment(); + $files[] = $curFile; + $curParent = $curFile; + $curSize = 0; + + $bodyLen = strlen($xmlDoc->saveXML($body->item(0))); + $headLen = strlen($xmlDoc->saveXML($head->item(0))) + $headerLength; + + $partSize = $this->splitDefaultSize - $headLen; + + if ($bodyLen > $partSize) { + $parts = ceil($bodyLen / $partSize); + $partSize = ($bodyLen / $parts) - $headLen; + } + + $node = $body->item(0)->firstChild; + + do { + $nodeData = $xmlDoc->saveXML($node); + $nodeLen = strlen($nodeData); + + if ($nodeLen > $partSize && $node->hasChildNodes()) { + $domPath[] = $node; + $domClonedPath[] = $node->cloneNode(false); + $domDepth++; + + $node = $node->firstChild; + } + + $node2 = $node->nextSibling; + + if ($node != null && $node->nodeName != "#text") { + $doSplit = false; + if ($splitOnSearchString) { + $doSplit = preg_match($searchString, $nodeData) == 1; + if ($doSplit) { + $chapterNames[] = trim($nodeData); + } + } + + if ($curSize > 0 && ($doSplit || (!$splitOnSearchString && $curSize + $nodeLen > $partSize))) { + $curFile = $xmlDoc->createDocumentFragment(); + $files[] = $curFile; + $curParent = $curFile; + if ($domDepth > 0) { + reset($domPath); + reset($domClonedPath); + $oneDomClonedPath = each($domClonedPath); + while ($oneDomClonedPath) { + list($k, $v) = $oneDomClonedPath; + $newParent = $v->cloneNode(false); + $curParent->appendChild($newParent); + $curParent = $newParent; + $oneDomClonedPath = each($domClonedPath); + } + } + $curSize = strlen($xmlDoc->saveXML($curFile)); + } + $curParent->appendChild($node->cloneNode(true)); + $curSize += $nodeLen; + } + + $node = $node2; + while ($node == null && $domDepth > 0) { + $domDepth--; + $node = end($domPath)->nextSibling; + array_pop($domPath); + array_pop($domClonedPath); + $curParent = $curParent->parentNode; + } + } while ($node != null); + + $curFile = null; + $curSize = 0; + + $xml = new DOMDocument('1.0', $xmlDoc->xmlEncoding); + $xml->lookupPrefix("http://www.w3.org/1999/xhtml"); + $xml->preserveWhiteSpace = false; + $xml->formatOutput = true; + + for ($idx = 0; $idx < count($files); $idx++) { + $xml2Doc = new DOMDocument('1.0', $xmlDoc->xmlEncoding); + $xml2Doc->lookupPrefix("http://www.w3.org/1999/xhtml"); + $xml2Doc->loadXML($newXML); + $html = $xml2Doc->getElementsByTagName("html")->item(0); + $html->appendChild($xml2Doc->importNode($head->item(0), true)); + $body = $xml2Doc->createElement("body"); + $html->appendChild($body); + $body->appendChild($xml2Doc->importNode($files[$idx], true)); + + // force pretty printing and correct formatting, should not be needed, but it is. + $xml->loadXML($xml2Doc->saveXML()); + + $doc = $xml->saveXML(); + + if ($this->bookVersion === EPub::BOOK_VERSION_EPUB3) { + $doc = preg_replace('#^\s*\s*#im', '', $doc); + } + + $chapterData[$splitOnSearchString ? $chapterNames[$idx] : $idx] = $doc; + } + + return $chapterData; + } +} +?> -- cgit v1.2.3