aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJeremy Benoist <jeremy.benoist@gmail.com>2019-05-29 14:18:04 +0200
committerJeremy Benoist <jeremy.benoist@gmail.com>2019-05-29 15:56:20 +0200
commitf3bfb875e94021a93e24a41fbc0f8d86d4dee378 (patch)
tree34f7efd0f3dc5326f68364e2fb2544619518c371
parentb7fa51ae7dd5fef2d9459100c88479413ddd3fb3 (diff)
downloadwallabag-f3bfb875e94021a93e24a41fbc0f8d86d4dee378.tar.gz
wallabag-f3bfb875e94021a93e24a41fbc0f8d86d4dee378.tar.zst
wallabag-f3bfb875e94021a93e24a41fbc0f8d86d4dee378.zip
Use hash given url to avoid duplicate
Using hashed url we can ensure an index on them to ensure it's fast.
-rw-r--r--app/DoctrineMigrations/Version20170710125843.php38
-rw-r--r--app/DoctrineMigrations/Version20171218135243.php48
-rw-r--r--app/DoctrineMigrations/Version20190601125843.php74
-rw-r--r--src/Wallabag/CoreBundle/Entity/Entry.php99
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php3
-rw-r--r--src/Wallabag/CoreBundle/Repository/EntryRepository.php2
6 files changed, 133 insertions, 131 deletions
diff --git a/app/DoctrineMigrations/Version20170710125843.php b/app/DoctrineMigrations/Version20170710125843.php
deleted file mode 100644
index 2cf8647a..00000000
--- a/app/DoctrineMigrations/Version20170710125843.php
+++ /dev/null
@@ -1,38 +0,0 @@
1<?php
2
3namespace Application\Migrations;
4
5use Doctrine\DBAL\Schema\Schema;
6use Wallabag\CoreBundle\Doctrine\WallabagMigration;
7
8/**
9 * Added `given_url` field in entry table.
10 */
11class Version20170710125843 extends WallabagMigration
12{
13 /**
14 * @param Schema $schema
15 */
16 public function up(Schema $schema)
17 {
18 $entryTable = $schema->getTable($this->getTable('entry'));
19
20 $this->skipIf($entryTable->hasColumn('given_url'), 'It seems that you already played this migration.');
21
22 $entryTable->addColumn('given_url', 'text', [
23 'notnull' => false,
24 ]);
25 }
26
27 /**
28 * @param Schema $schema
29 */
30 public function down(Schema $schema)
31 {
32 $entryTable = $schema->getTable($this->getTable('entry'));
33
34 $this->skipIf(!$entryTable->hasColumn('given_url'), 'It seems that you already played this migration.');
35
36 $entryTable->dropColumn('given_url');
37 }
38}
diff --git a/app/DoctrineMigrations/Version20171218135243.php b/app/DoctrineMigrations/Version20171218135243.php
deleted file mode 100644
index 3060c920..00000000
--- a/app/DoctrineMigrations/Version20171218135243.php
+++ /dev/null
@@ -1,48 +0,0 @@
1<?php
2
3namespace Application\Migrations;
4
5use Doctrine\DBAL\Schema\Schema;
6use Wallabag\CoreBundle\Doctrine\WallabagMigration;
7
8/**
9 * Added indexes on wallabag_entry.url and wallabag_entry.given_url and wallabag_entry.user_id.
10 */
11class Version20171218135243 extends WallabagMigration
12{
13 private $indexGivenUrl = 'IDX_entry_given_url';
14
15 /**
16 * @param Schema $schema
17 */
18 public function up(Schema $schema)
19 {
20 $entryTable = $schema->getTable($this->getTable('entry'));
21 $this->skipIf($entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.');
22
23 switch ($this->connection->getDatabasePlatform()->getName()) {
24 case 'sqlite':
25 $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);';
26 break;
27 case 'mysql':
28 $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url (255), given_url (255), user_id);';
29 break;
30 case 'postgresql':
31 $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);';
32 break;
33 }
34
35 $this->addSql($sql);
36 }
37
38 /**
39 * @param Schema $schema
40 */
41 public function down(Schema $schema)
42 {
43 $entryTable = $schema->getTable($this->getTable('entry'));
44 $this->skipIf(false === $entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.');
45
46 $entryTable->dropIndex($this->indexGivenUrl);
47 }
48}
diff --git a/app/DoctrineMigrations/Version20190601125843.php b/app/DoctrineMigrations/Version20190601125843.php
new file mode 100644
index 00000000..341d64dc
--- /dev/null
+++ b/app/DoctrineMigrations/Version20190601125843.php
@@ -0,0 +1,74 @@
1<?php
2
3namespace Application\Migrations;
4
5use Doctrine\DBAL\Schema\Schema;
6use Wallabag\CoreBundle\Doctrine\WallabagMigration;
7
8/**
9 * Added `given_url` & `hashed_given_url` field in entry table.
10 */
11class Version20190601125843 extends WallabagMigration
12{
13 /**
14 * @param Schema $schema
15 */
16 public function up(Schema $schema)
17 {
18 $entryTable = $schema->getTable($this->getTable('entry'));
19
20 if (!$entryTable->hasColumn('given_url')) {
21 $entryTable->addColumn('given_url', 'text', [
22 'notnull' => false,
23 ]);
24 }
25
26 if (!$entryTable->hasColumn('hashed_given_url')) {
27 $entryTable->addColumn('hashed_given_url', 'text', [
28 'length' => 40,
29 'notnull' => false,
30 ]);
31 }
32
33 $entryTable->dropIndex('hashed_url_user_id');
34 $entryTable->addIndex(
35 [
36 'user_id',
37 'hashed_url',
38 'hashed_given_url',
39 ],
40 'hashed_urls_user_id',
41 [],
42 [
43 // specify length for index which is required by MySQL on text field
44 'lengths' => [
45 // user_id
46 null,
47 // hashed_url
48 40,
49 // hashed_given_url
50 40,
51 ],
52 ]
53 );
54 }
55
56 /**
57 * @param Schema $schema
58 */
59 public function down(Schema $schema)
60 {
61 $entryTable = $schema->getTable($this->getTable('entry'));
62
63 if ($entryTable->hasColumn('given_url')) {
64 $entryTable->dropColumn('given_url');
65 }
66
67 if ($entryTable->hasColumn('hashed_given_url')) {
68 $entryTable->dropColumn('hashed_given_url');
69 }
70
71 $entryTable->dropIndex('hashed_urls_user_id');
72 $entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]);
73 }
74}
diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php
index 62274136..304dd1b3 100644
--- a/src/Wallabag/CoreBundle/Entity/Entry.php
+++ b/src/Wallabag/CoreBundle/Entity/Entry.php
@@ -27,9 +27,8 @@ use Wallabag\UserBundle\Entity\User;
27 * indexes={ 27 * indexes={
28 * @ORM\Index(name="created_at", columns={"created_at"}), 28 * @ORM\Index(name="created_at", columns={"created_at"}),
29 * @ORM\Index(name="uid", columns={"uid"}), 29 * @ORM\Index(name="uid", columns={"uid"}),
30 * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) 30 * @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}})
31 * }, 31 * }
32 * uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})}
33 * ) 32 * )
34 * @ORM\HasLifecycleCallbacks() 33 * @ORM\HasLifecycleCallbacks()
35 * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") 34 * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())")
@@ -69,30 +68,52 @@ class Entry
69 private $title; 68 private $title;
70 69
71 /** 70 /**
71 * Define the url fetched by wallabag (the final url after potential redirections).
72 *
72 * @var string 73 * @var string
73 * 74 *
74 * @ORM\Column(name="given_url", type="text", nullable=true) 75 * @Assert\NotBlank()
76 * @ORM\Column(name="url", type="text", nullable=true)
75 * 77 *
76 * @Groups({"entries_for_user", "export_all"}) 78 * @Groups({"entries_for_user", "export_all"})
77 */ 79 */
78 private $givenUrl; 80 private $url;
79 81
80 /** 82 /**
81 * @var string 83 * @var string
82 * 84 *
83 * @Assert\NotBlank() 85 * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true)
84 * @ORM\Column(name="url", type="text", nullable=true) 86 */
87 private $hashedUrl;
88
89 /**
90 * From where user retrieved/found the url (an other article, a twitter, or the given_url if non are provided).
91 *
92 * @var string
93 *
94 * @ORM\Column(name="origin_url", type="text", nullable=true)
85 * 95 *
86 * @Groups({"entries_for_user", "export_all"}) 96 * @Groups({"entries_for_user", "export_all"})
87 */ 97 */
88 private $url; 98 private $originUrl;
89 99
90 /** 100 /**
101 * Define the url entered by the user (without redirections).
102 *
91 * @var string 103 * @var string
92 * 104 *
93 * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) 105 * @ORM\Column(name="given_url", type="text", nullable=true)
106 *
107 * @Groups({"entries_for_user", "export_all"})
94 */ 108 */
95 private $hashedUrl; 109 private $givenUrl;
110
111 /**
112 * @var string
113 *
114 * @ORM\Column(name="hashed_given_url", type="string", length=40, nullable=true)
115 */
116 private $hashedGivenUrl;
96 117
97 /** 118 /**
98 * @var bool 119 * @var bool
@@ -273,15 +294,6 @@ class Entry
273 */ 294 */
274 private $tags; 295 private $tags;
275 296
276 /**
277 * @var string
278 *
279 * @ORM\Column(name="origin_url", type="text", nullable=true)
280 *
281 * @Groups({"entries_for_user", "export_all"})
282 */
283 private $originUrl;
284
285 /* 297 /*
286 * @param User $user 298 * @param User $user
287 */ 299 */
@@ -326,30 +338,6 @@ class Entry
326 } 338 }
327 339
328 /** 340 /**
329 * Set given url.
330 *
331 * @param string $givenUrl
332 *
333 * @return Entry
334 */
335 public function setGivenUrl($givenUrl)
336 {
337 $this->givenUrl = $givenUrl;
338
339 return $this;
340 }
341
342 /**
343 * Get given Url.
344 *
345 * @return string
346 */
347 public function getGivenUrl()
348 {
349 return $this->givenUrl;
350 }
351
352 /**
353 * Set url. 341 * Set url.
354 * 342 *
355 * @param string $url 343 * @param string $url
@@ -957,6 +945,31 @@ class Entry
957 } 945 }
958 946
959 /** 947 /**
948 * Set origin url.
949 *
950 * @param string $givenUrl
951 *
952 * @return Entry
953 */
954 public function setGivenUrl($givenUrl)
955 {
956 $this->givenUrl = $givenUrl;
957 $this->hashedGivenUrl = UrlHasher::hashUrl($givenUrl);
958
959 return $this;
960 }
961
962 /**
963 * Get origin url.
964 *
965 * @return string
966 */
967 public function getGivenUrl()
968 {
969 return $this->givenUrl;
970 }
971
972 /**
960 * @return string 973 * @return string
961 */ 974 */
962 public function getHashedUrl() 975 public function getHashedUrl()
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index 0d6a412d..5901df8b 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -76,9 +76,10 @@ class ContentProxy
76 // Not sure what are the other possible cases where this property is empty 76 // Not sure what are the other possible cases where this property is empty
77 if (empty($entry->getUrl()) && !empty($url)) { 77 if (empty($entry->getUrl()) && !empty($url)) {
78 $entry->setUrl($url); 78 $entry->setUrl($url);
79 $entry->setGivenUrl($url);
80 } 79 }
81 80
81 $entry->setGivenUrl($url);
82
82 $this->stockEntry($entry, $content); 83 $this->stockEntry($entry, $content);
83 } 84 }
84 85
diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php
index 299b0b27..8b29aad2 100644
--- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php
+++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php
@@ -368,7 +368,7 @@ class EntryRepository extends EntityRepository
368 { 368 {
369 $res = $this->createQueryBuilder('e') 369 $res = $this->createQueryBuilder('e')
370 ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) 370 ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl)
371 // ->orWhere('e.givenUrl = :url')->setParameter('url', $url) 371 ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl)
372 ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) 372 ->andWhere('e.user = :user_id')->setParameter('user_id', $userId)
373 ->getQuery() 373 ->getQuery()
374 ->getResult(); 374 ->getResult();