diff options
author | nicosomb <nicolas@loeuillet.org> | 2013-04-17 15:11:57 +0200 |
---|---|---|
committer | nicosomb <nicolas@loeuillet.org> | 2013-04-17 15:11:57 +0200 |
commit | 1c182b6c9d74875054dc8efe75564ceab1bc1abe (patch) | |
tree | 97ab86be512a518ec3fac8d629211bb666741004 /inc | |
parent | 4456315b217f4eb253fc4403e43cdcb0012356e6 (diff) | |
download | wallabag-1c182b6c9d74875054dc8efe75564ceab1bc1abe.tar.gz wallabag-1c182b6c9d74875054dc8efe75564ceab1bc1abe.tar.zst wallabag-1c182b6c9d74875054dc8efe75564ceab1bc1abe.zip |
téléchargement des images d'un article
Diffstat (limited to 'inc')
-rw-r--r-- | inc/config.php | 5 | ||||
-rw-r--r-- | inc/db.php | 7 | ||||
-rwxr-xr-x | inc/functions.php | 138 |
3 files changed, 134 insertions, 16 deletions
diff --git a/inc/config.php b/inc/config.php index d468228d..51a25d36 100644 --- a/inc/config.php +++ b/inc/config.php | |||
@@ -7,7 +7,10 @@ | |||
7 | * @copyright 2013 | 7 | * @copyright 2013 |
8 | * @license http://www.wtfpl.net/ see COPYING file | 8 | * @license http://www.wtfpl.net/ see COPYING file |
9 | */ | 9 | */ |
10 | |||
11 | if(!is_dir('db/')){mkdir('db/',0705);} | ||
10 | define ('DB_PATH', 'sqlite:./db/poche.sqlite'); | 12 | define ('DB_PATH', 'sqlite:./db/poche.sqlite'); |
13 | define ('ABS_PATH', 'archiveImg/'); | ||
11 | 14 | ||
12 | include 'db.php'; | 15 | include 'db.php'; |
13 | include 'functions.php'; | 16 | include 'functions.php'; |
@@ -44,4 +47,4 @@ $token = (isset ($_REQUEST['token'])) ? $_REQUEST['token'] : ''; | |||
44 | 47 | ||
45 | if ($action != '') { | 48 | if ($action != '') { |
46 | action_to_do($action, $id, $url, $token); | 49 | action_to_do($action, $id, $url, $token); |
47 | } \ No newline at end of file | 50 | } |
@@ -10,12 +10,15 @@ | |||
10 | 10 | ||
11 | class db { | 11 | class db { |
12 | var $handle; | 12 | var $handle; |
13 | function __construct($path) { | 13 | function __construct($path) |
14 | { | ||
14 | $this->handle = new PDO($path); | 15 | $this->handle = new PDO($path); |
16 | $this->handle->exec('CREATE TABLE IF NOT EXISTS "entries" ("id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL UNIQUE , "title" VARCHAR, "url" VARCHAR UNIQUE , "is_read" INTEGER DEFAULT 0, "is_fav" INTEGER DEFAULT 0, "content" BLOB)'); | ||
15 | $this->handle->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); | 17 | $this->handle->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION); |
16 | } | 18 | } |
17 | 19 | ||
18 | public function getHandle() { | 20 | public function getHandle() |
21 | { | ||
19 | return $this->handle; | 22 | return $this->handle; |
20 | } | 23 | } |
21 | } \ No newline at end of file | 24 | } \ No newline at end of file |
diff --git a/inc/functions.php b/inc/functions.php index 02d2c9dc..37abafd4 100755 --- a/inc/functions.php +++ b/inc/functions.php | |||
@@ -16,8 +16,9 @@ function get_poche_url() | |||
16 | } | 16 | } |
17 | 17 | ||
18 | // function define to retrieve url content | 18 | // function define to retrieve url content |
19 | function get_external_file($url, $timeout) | 19 | function get_external_file($url) |
20 | { | 20 | { |
21 | $timeout = 15; | ||
21 | // spoofing FireFox 18.0 | 22 | // spoofing FireFox 18.0 |
22 | $useragent="Mozilla/5.0 (Windows NT 5.1; rv:18.0) Gecko/20100101 Firefox/18.0"; | 23 | $useragent="Mozilla/5.0 (Windows NT 5.1; rv:18.0) Gecko/20100101 Firefox/18.0"; |
23 | 24 | ||
@@ -45,10 +46,7 @@ function get_external_file($url, $timeout) | |||
45 | } else { | 46 | } else { |
46 | 47 | ||
47 | // create http context and add timeout and user-agent | 48 | // create http context and add timeout and user-agent |
48 | $context = stream_context_create(array('http'=>array('timeout' => $timeout, // Timeout : time until we stop waiting for the response. | 49 | $context = stream_context_create(array('http'=>array('timeout' => $timeout,'header'=> "User-Agent: ".$useragent,/*spoot Mozilla Firefox*/'follow_location' => true))); |
49 | 'header'=> "User-Agent: ".$useragent, // spoot Mozilla Firefox | ||
50 | 'follow_location' => true | ||
51 | ))); | ||
52 | 50 | ||
53 | // only download page lesser than 4MB | 51 | // only download page lesser than 4MB |
54 | $data = @file_get_contents($url, false, $context, -1, 4000000); // We download at most 4 MB from source. | 52 | $data = @file_get_contents($url, false, $context, -1, 4000000); // We download at most 4 MB from source. |
@@ -90,7 +88,7 @@ function get_external_file($url, $timeout) | |||
90 | /** | 88 | /** |
91 | * Préparation de l'URL avec récupération du contenu avant insertion en base | 89 | * Préparation de l'URL avec récupération du contenu avant insertion en base |
92 | */ | 90 | */ |
93 | function prepare_url($url) | 91 | function prepare_url($url, $id) |
94 | { | 92 | { |
95 | $parametres = array(); | 93 | $parametres = array(); |
96 | $url = html_entity_decode(trim($url)); | 94 | $url = html_entity_decode(trim($url)); |
@@ -112,19 +110,127 @@ function prepare_url($url) | |||
112 | $r->convertLinksToFootnotes = TRUE; | 110 | $r->convertLinksToFootnotes = TRUE; |
113 | if($r->init()) | 111 | if($r->init()) |
114 | { | 112 | { |
115 | $title = $r->articleTitle->innerHTML; | 113 | $content = $r->articleContent->innerHTML; |
114 | $parametres['title'] = $r->articleTitle->innerHTML; | ||
115 | $parametres['content'] = filtre_picture($content, $url, $id); | ||
116 | return $parametres; | ||
116 | } | 117 | } |
117 | } | 118 | } |
118 | 119 | ||
119 | $parametres['title'] = $title; | 120 | logm('error during url preparation'); |
120 | $parametres['content'] = $r->articleContent->innerHTML; | 121 | return FALSE; |
122 | } | ||
123 | |||
124 | /** | ||
125 | * On modifie les URLS des images dans le corps de l'article | ||
126 | */ | ||
127 | function filtre_picture($content, $url, $id) | ||
128 | { | ||
129 | $matches = array(); | ||
130 | preg_match_all('#<\s*(img)[^>]+src="([^"]*)"[^>]*>#Si', $content, $matches, PREG_SET_ORDER); | ||
131 | foreach($matches as $i => $link) | ||
132 | { | ||
133 | $link[1] = trim($link[1]); | ||
134 | if (!preg_match('#^(([a-z]+://)|(\#))#', $link[1]) ) | ||
135 | { | ||
136 | $absolute_path = get_absolute_link($link[2],$url); | ||
137 | $filename = basename(parse_url($absolute_path, PHP_URL_PATH)); | ||
138 | $directory = create_assets_directory($id); | ||
139 | $fullpath = $directory . '/' . $filename; | ||
140 | download_pictures($absolute_path, $fullpath); | ||
141 | $content = str_replace($matches[$i][2], $fullpath, $content); | ||
142 | } | ||
143 | |||
144 | } | ||
145 | |||
146 | return $content; | ||
147 | } | ||
148 | |||
149 | /** | ||
150 | * Retourne le lien absolu | ||
151 | */ | ||
152 | function get_absolute_link($relative_link, $url) | ||
153 | { | ||
154 | /* return if already absolute URL */ | ||
155 | if (parse_url($relative_link, PHP_URL_SCHEME) != '') return $relative_link; | ||
156 | |||
157 | /* queries and anchors */ | ||
158 | if ($relative_link[0]=='#' || $relative_link[0]=='?') return $url . $relative_link; | ||
159 | |||
160 | /* parse base URL and convert to local variables: | ||
161 | $scheme, $host, $path */ | ||
162 | extract(parse_url($url)); | ||
163 | |||
164 | /* remove non-directory element from path */ | ||
165 | $path = preg_replace('#/[^/]*$#', '', $path); | ||
166 | |||
167 | /* destroy path if relative url points to root */ | ||
168 | if ($relative_link[0] == '/') $path = ''; | ||
169 | |||
170 | /* dirty absolute URL */ | ||
171 | $abs = $host . $path . '/' . $relative_link; | ||
172 | |||
173 | /* replace '//' or '/./' or '/foo/../' with '/' */ | ||
174 | $re = array('#(/\.?/)#', '#/(?!\.\.)[^/]+/\.\./#'); | ||
175 | for($n=1; $n>0; $abs=preg_replace($re, '/', $abs, -1, $n)) {} | ||
176 | |||
177 | /* absolute URL is ready! */ | ||
178 | return $scheme.'://'.$abs; | ||
179 | } | ||
180 | |||
181 | /** | ||
182 | * Téléchargement des images | ||
183 | */ | ||
184 | |||
185 | function download_pictures($absolute_path, $fullpath) | ||
186 | { | ||
187 | $rawdata = get_external_file($absolute_path); | ||
121 | 188 | ||
122 | return $parametres; | 189 | if(file_exists($fullpath)) { |
190 | unlink($fullpath); | ||
191 | } | ||
192 | $fp = fopen($fullpath, 'x'); | ||
193 | fwrite($fp, $rawdata); | ||
194 | fclose($fp); | ||
195 | } | ||
196 | |||
197 | /** | ||
198 | * Crée un répertoire de médias pour l'article | ||
199 | */ | ||
200 | function create_assets_directory($id) | ||
201 | { | ||
202 | $assets_path = ABS_PATH; | ||
203 | if(!is_dir($assets_path)) { | ||
204 | mkdir($assets_path, 0705); | ||
205 | } | ||
206 | |||
207 | $article_directory = $assets_path . $id; | ||
208 | if(!is_dir($article_directory)) { | ||
209 | mkdir($article_directory, 0705); | ||
210 | } | ||
211 | |||
212 | return $article_directory; | ||
213 | } | ||
214 | |||
215 | /** | ||
216 | * Suppression du répertoire d'images | ||
217 | */ | ||
218 | function remove_directory($directory) | ||
219 | { | ||
220 | if(is_dir($directory)) { | ||
221 | $files = array_diff(scandir($directory), array('.','..')); | ||
222 | foreach ($files as $file) { | ||
223 | // FIXME c'est quoi delTree ?? | ||
224 | (is_dir("$directory/$file")) ? delTree("$directory/$file") : unlink("$directory/$file"); | ||
225 | } | ||
226 | return rmdir($directory); | ||
227 | } | ||
123 | } | 228 | } |
124 | 229 | ||
125 | /** | 230 | /** |
126 | * Appel d'une action (mark as fav, archive, delete) | 231 | * Appel d'une action (mark as fav, archive, delete) |
127 | */ | 232 | */ |
233 | |||
128 | function action_to_do($action, $id, $url, $token) | 234 | function action_to_do($action, $id, $url, $token) |
129 | { | 235 | { |
130 | global $db; | 236 | global $db; |
@@ -135,13 +241,19 @@ function action_to_do($action, $id, $url, $token) | |||
135 | if ($url == '') | 241 | if ($url == '') |
136 | continue; | 242 | continue; |
137 | 243 | ||
138 | $parametres_url = prepare_url($url); | 244 | $req = $db->getHandle()->query("SELECT id FROM entries ORDER BY id DESC"); |
139 | $sql_action = 'INSERT INTO entries ( url, title, content ) VALUES (?, ?, ?)'; | 245 | $id = $req->fetchColumn()+1; |
140 | $params_action = array($url, $parametres_url['title'], $parametres_url['content']); | 246 | |
247 | if($parametres_url = prepare_url($url, $id)) { | ||
248 | $sql_action = 'INSERT INTO entries ( id, url, title, content ) VALUES (?,?, ?, ?)'; | ||
249 | $params_action = array($id,$url, $parametres_url['title'], $parametres_url['content']); | ||
250 | } | ||
251 | |||
141 | logm('add link ' . $url); | 252 | logm('add link ' . $url); |
142 | break; | 253 | break; |
143 | case 'delete': | 254 | case 'delete': |
144 | if (verif_token($token)) { | 255 | if (verif_token($token)) { |
256 | remove_directory(ABS_PATH . $id); | ||
145 | $sql_action = "DELETE FROM entries WHERE id=?"; | 257 | $sql_action = "DELETE FROM entries WHERE id=?"; |
146 | $params_action = array($id); | 258 | $params_action = array($id); |
147 | logm('delete link #' . $id); | 259 | logm('delete link #' . $id); |