*/
private function stockEntry(Entry $entry, array $content)
{
- // When a redirection occurs while fetching an entry
- // we move the original url in origin_url property if empty
- // and set the entry url with the final value
- if (!empty($content['url']) && $entry->getUrl() !== $content['url']) {
- if (empty($entry->getOriginUrl())) {
- $entry->setOriginUrl($entry->getUrl());
- }
- $entry->setUrl($content['url']);
- }
+ $this->updateOriginUrl($entry, $content['url']);
$this->setEntryDomainName($entry);
}
}
+ /**
+ * Update the origin_url field when a redirection occurs
+ * This field is set if it is empty and new url does not match ignore list.
+ *
+ * @param Entry $entry
+ * @param string $url
+ */
+ private function updateOriginUrl(Entry $entry, $url)
+ {
+ if (!empty($url) && $entry->getUrl() !== $url) {
+ $parsed_entry_url = parse_url($entry->getUrl());
+ $parsed_content_url = parse_url($url);
+
+ $diff_ec = array_diff_assoc($parsed_entry_url, $parsed_content_url);
+ $diff_ce = array_diff_assoc($parsed_content_url, $parsed_entry_url);
+
+ $diff = array_merge($diff_ec, $diff_ce);
+ $diff_keys = array_keys($diff);
+ sort($diff_keys);
+
+ switch ($diff_keys) {
+ case ['path']:
+ if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry
+ || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId
+ $entry->setUrl($url);
+ }
+ break;
+ case ['scheme']:
+ $entry->setUrl($url);
+ break;
+ case ['fragment']:
+ case ['query']:
+ // noop
+ break;
+ default:
+ if (empty($entry->getOriginUrl())) {
+ $entry->setOriginUrl($entry->getUrl());
+ }
+ $entry->setUrl($url);
+ break;
+ }
+ }
+ }
+
/**
* Validate that the given content has at least a title, an html and a url.
*
$this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
}
+ /**
+ * Data provider for testWithChangedUrl.
+ *
+ * Arrays contain the following values:
+ * $entry_url
+ * $origin_url
+ * $content_url
+ * $expected_entry_url
+ * $expected_origin_url
+ * $expected_domain
+ */
+ public function dataForChangedUrl()
+ {
+ return [
+ 'normal' => [
+ 'http://0.0.0.0',
+ null,
+ 'http://1.1.1.1',
+ 'http://1.1.1.1',
+ 'http://0.0.0.0',
+ '1.1.1.1',
+ ],
+ 'origin already set' => [
+ 'http://0.0.0.0',
+ 'http://hello',
+ 'http://1.1.1.1',
+ 'http://1.1.1.1',
+ 'http://hello',
+ '1.1.1.1',
+ ],
+ 'trailing slash' => [
+ 'https://example.com/hello-world',
+ null,
+ 'https://example.com/hello-world/',
+ 'https://example.com/hello-world/',
+ null,
+ 'example.com',
+ ],
+ 'no query string in fetched content' => [
+ 'https://example.org/hello?world=1',
+ null,
+ 'https://example.org/hello',
+ 'https://example.org/hello?world=1',
+ null,
+ 'example.org',
+ ],
+ 'query string in fetched content' => [
+ 'https://example.org/hello',
+ null,
+ 'https://example.org/hello?world=1',
+ 'https://example.org/hello',
+ null,
+ 'example.org',
+ ],
+ 'fragment in fetched content' => [
+ 'https://example.org/hello',
+ null,
+ 'https://example.org/hello#world',
+ 'https://example.org/hello',
+ null,
+ 'example.org',
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider dataForChangedUrl
+ */
+ public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
+ {
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
+ $entry = new Entry(new User());
+ $entry->setOriginUrl($origin_url);
+ $proxy->updateEntry(
+ $entry,
+ $entry_url,
+ [
+ 'html' => false,
+ 'title' => '',
+ 'url' => $content_url,
+ 'content_type' => '',
+ 'language' => '',
+ ],
+ true
+ );
+
+ $this->assertSame($expected_entry_url, $entry->getUrl());
+ $this->assertSame($expected_domain, $entry->getDomainName());
+ $this->assertSame($expected_origin_url, $entry->getOriginUrl());
+ }
+
/**
* https://stackoverflow.com/a/18506801.
*
return $string;
}
- public function testWithChangedUrl()
- {
- $tagger = $this->getTaggerMock();
- $tagger->expects($this->once())
- ->method('tag');
-
- $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
- $entry = new Entry(new User());
- $proxy->updateEntry(
- $entry,
- 'http://0.0.0.0',
- [
- 'html' => false,
- 'title' => '',
- 'url' => 'http://1.1.1.1',
- 'content_type' => '',
- 'language' => '',
- ],
- true
- );
-
- $this->assertSame('http://1.1.1.1', $entry->getUrl());
- $this->assertSame('1.1.1.1', $entry->getDomainName());
- $this->assertSame('http://0.0.0.0', $entry->getOriginUrl());
- }
-
private function getTaggerMock()
{
return $this->getMockBuilder(RuleBasedTagger::class)