1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
<?php
/**
* Description of FanFictionNet
*
* @author Sander
*/
class FanFictionNet extends MultipleFileHandler {
private static $prefix = "http://www.fanfiction.net/s/";
private $downloadedMetadata = false;
private $id = 0;
private $chapterCount = -1;
public function __construct($url) {
$ending = substr($url, strlen(self::$prefix));
$this->id = intval(substr($ending, 0, strpos($ending, "/")));
for($i = 1; $i <= max(1, $this->chapterCount); $i++){
$this->addChapter($i);
}
}
private function addChapter($n){
$doc = new DOMDocument();
$file = Http::Request(self::$prefix.$this->id."/".$n."/");
@$doc->loadHTML($file) or die($file);
if(!$this->downloadedMetadata){
$this->loadMetadata($doc);
$this->downloadedMetadata = true;
}
if($this->chapterCount < 0){
$this->chapterCount = $this->getNumberChapters($doc);
if($this->chapterCount > 4){
die("Too many files to download, don't use php for this!");
}
}
$textEl = $doc->getElementById("storytext");
if($textEl == null) die("Error: ".$doc->saveHTML());
$horizontalRulebars = $doc->getElementsByTagName('hr');
/**
* @var DOMNode
*/
$hr;
foreach($horizontalRulebars as $hr) {
$hr->setAttribute("size", null);
$hr->setAttribute("noshade", null);
}
$text = $this->innerHtml($textEl);
$title = "";
$selects = $doc->getElementsByTagName('select');
foreach($selects as $select) {
if($select->hasAttribute("name") && $select->getAttribute("name") == "chapter"){
$options = $select->getElementsByTagName("option");
$test = $n.". ";
foreach($options as $option){
$val = $option->nodeValue;
if(substr($val, 0, strlen($test)) == $test){
$title = substr($val, strlen($test));
break;
}
}
break;
}
}
$this->addPage($text, $title);
}
private function getNumberChapters($doc){
$selects = $doc->getElementsByTagName('select');
foreach($selects as $select) {
if($select->hasAttribute("name") && $select->getAttribute("name") == "chapter"){
$options = $select->getElementsByTagName("option");
$count = $options->length;
return $count;
}
}
}
private function loadMetadata($doc){
//Author
$links = $doc->getElementsByTagName('a');
foreach($links as $link) {
if($link == null){
var_dump($link);
}
if($link->hasAttribute("href") && substr($link->getAttribute("href"), 0, 3) == "/u/"){
$this->setMetadata("author", $link->nodeValue);
}
}
//Title
/*
$links = $doc->getElementsByTagName('link');
foreach($links as $link) {
if($link->hasAttribute("rel") && $link->getAttribute("rel") == "canonical"){
$url = $link->getAttribute("href");
$title = str_replace("_", " ", substr($url, strrpos($url, "/")+1));
$this->setMetadata("title", $title);
}
}*/
//TODO: Find a more reliable way to extract the title
$title = $doc->getElementsByTagName("b")->item(0)->nodeValue;
$this->setMetadata("title", $title);
}
private function innerHtml($node){
$doc = new DOMDocument();
foreach ($node->childNodes as $child)
$doc->appendChild($doc->importNode($child, true));
return $doc->saveHTML();
}
public static function Matches($url){
//TODO: Implement with regex
return strpos($url, self::$prefix) !== false;
}
}
?>
|