aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php79
1 files changed, 59 insertions, 20 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index 1a2a330f..2dc436f8 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -332,32 +332,71 @@ class ContentProxy
332 $diff_keys = array_keys($diff); 332 $diff_keys = array_keys($diff);
333 sort($diff_keys); 333 sort($diff_keys);
334 334
335 switch ($diff_keys) { 335 if ($this->ignoreUrl($entry->getUrl())) {
336 case ['path']: 336 $entry->setUrl($url);
337 if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry 337 } else {
338 || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId 338 switch ($diff_keys) {
339 case ['path']:
340 if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry
341 || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId
342 $entry->setUrl($url);
343 }
344 break;
345 case ['scheme']:
339 $entry->setUrl($url); 346 $entry->setUrl($url);
340 } 347 break;
341 break; 348 case ['fragment']:
342 case ['scheme']: 349 case ['query']:
343 $entry->setUrl($url); 350 case ['fragment', 'query']:
344 break; 351 // noop
345 case ['fragment']: 352 break;
346 case ['query']: 353 default:
347 case ['fragment', 'query']: 354 if (empty($entry->getOriginUrl())) {
348 // noop 355 $entry->setOriginUrl($entry->getUrl());
349 break; 356 }
350 default: 357 $entry->setUrl($url);
351 if (empty($entry->getOriginUrl())) { 358 break;
352 $entry->setOriginUrl($entry->getUrl()); 359 }
353 }
354 $entry->setUrl($url);
355 break;
356 } 360 }
357 } 361 }
358 } 362 }
359 363
360 /** 364 /**
365 * Check entry url against an ignore list to replace with content url.
366 *
367 * XXX: move the ignore list in the database to let users handle it
368 *
369 * @param string $url url to test
370 *
371 * @return bool true if url matches ignore list otherwise false
372 */
373 private function ignoreUrl($url)
374 {
375 $ignored_hosts = ['feedproxy.google.com', 'feeds.reuters.com'];
376 $ignored_patterns = ['https?://www\.lemonde\.fr/tiny.*'];
377
378 $parsed_url = parse_url($url);
379
380 $filtered = array_filter($ignored_hosts, function ($var) use ($parsed_url) {
381 return $var === $parsed_url['host'];
382 });
383
384 if ([] !== $filtered) {
385 return true;
386 }
387
388 $filtered = array_filter($ignored_patterns, function ($var) use ($url) {
389 return preg_match("`$var`i", $url);
390 });
391
392 if ([] !== $filtered) {
393 return true;
394 }
395
396 return false;
397 }
398
399 /**
361 * Validate that the given content has at least a title, an html and a url. 400 * Validate that the given content has at least a title, an html and a url.
362 * 401 *
363 * @param array $content 402 * @param array $content