diff options
Diffstat (limited to 'src/Wallabag')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 79 |
1 files changed, 59 insertions, 20 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 1a2a330f..2dc436f8 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -332,32 +332,71 @@ class ContentProxy | |||
332 | $diff_keys = array_keys($diff); | 332 | $diff_keys = array_keys($diff); |
333 | sort($diff_keys); | 333 | sort($diff_keys); |
334 | 334 | ||
335 | switch ($diff_keys) { | 335 | if ($this->ignoreUrl($entry->getUrl())) { |
336 | case ['path']: | 336 | $entry->setUrl($url); |
337 | if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry | 337 | } else { |
338 | || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId | 338 | switch ($diff_keys) { |
339 | case ['path']: | ||
340 | if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry | ||
341 | || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId | ||
342 | $entry->setUrl($url); | ||
343 | } | ||
344 | break; | ||
345 | case ['scheme']: | ||
339 | $entry->setUrl($url); | 346 | $entry->setUrl($url); |
340 | } | 347 | break; |
341 | break; | 348 | case ['fragment']: |
342 | case ['scheme']: | 349 | case ['query']: |
343 | $entry->setUrl($url); | 350 | case ['fragment', 'query']: |
344 | break; | 351 | // noop |
345 | case ['fragment']: | 352 | break; |
346 | case ['query']: | 353 | default: |
347 | case ['fragment', 'query']: | 354 | if (empty($entry->getOriginUrl())) { |
348 | // noop | 355 | $entry->setOriginUrl($entry->getUrl()); |
349 | break; | 356 | } |
350 | default: | 357 | $entry->setUrl($url); |
351 | if (empty($entry->getOriginUrl())) { | 358 | break; |
352 | $entry->setOriginUrl($entry->getUrl()); | 359 | } |
353 | } | ||
354 | $entry->setUrl($url); | ||
355 | break; | ||
356 | } | 360 | } |
357 | } | 361 | } |
358 | } | 362 | } |
359 | 363 | ||
360 | /** | 364 | /** |
365 | * Check entry url against an ignore list to replace with content url. | ||
366 | * | ||
367 | * XXX: move the ignore list in the database to let users handle it | ||
368 | * | ||
369 | * @param string $url url to test | ||
370 | * | ||
371 | * @return bool true if url matches ignore list otherwise false | ||
372 | */ | ||
373 | private function ignoreUrl($url) | ||
374 | { | ||
375 | $ignored_hosts = ['feedproxy.google.com', 'feeds.reuters.com']; | ||
376 | $ignored_patterns = ['https?://www\.lemonde\.fr/tiny.*']; | ||
377 | |||
378 | $parsed_url = parse_url($url); | ||
379 | |||
380 | $filtered = array_filter($ignored_hosts, function ($var) use ($parsed_url) { | ||
381 | return $var === $parsed_url['host']; | ||
382 | }); | ||
383 | |||
384 | if ([] !== $filtered) { | ||
385 | return true; | ||
386 | } | ||
387 | |||
388 | $filtered = array_filter($ignored_patterns, function ($var) use ($url) { | ||
389 | return preg_match("`$var`i", $url); | ||
390 | }); | ||
391 | |||
392 | if ([] !== $filtered) { | ||
393 | return true; | ||
394 | } | ||
395 | |||
396 | return false; | ||
397 | } | ||
398 | |||
399 | /** | ||
361 | * Validate that the given content has at least a title, an html and a url. | 400 | * Validate that the given content has at least a title, an html and a url. |
362 | * | 401 | * |
363 | * @param array $content | 402 | * @param array $content |