diff options
author | Jeremy Benoist <jeremy.benoist@gmail.com> | 2019-05-29 14:18:04 +0200 |
---|---|---|
committer | Jeremy Benoist <jeremy.benoist@gmail.com> | 2019-05-29 15:56:20 +0200 |
commit | f3bfb875e94021a93e24a41fbc0f8d86d4dee378 (patch) | |
tree | 34f7efd0f3dc5326f68364e2fb2544619518c371 | |
parent | b7fa51ae7dd5fef2d9459100c88479413ddd3fb3 (diff) | |
download | wallabag-f3bfb875e94021a93e24a41fbc0f8d86d4dee378.tar.gz wallabag-f3bfb875e94021a93e24a41fbc0f8d86d4dee378.tar.zst wallabag-f3bfb875e94021a93e24a41fbc0f8d86d4dee378.zip |
Use hash given url to avoid duplicate
Using hashed url we can ensure an index on them to ensure it's fast.
-rw-r--r-- | app/DoctrineMigrations/Version20170710125843.php | 38 | ||||
-rw-r--r-- | app/DoctrineMigrations/Version20171218135243.php | 48 | ||||
-rw-r--r-- | app/DoctrineMigrations/Version20190601125843.php | 74 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Entity/Entry.php | 99 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 3 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Repository/EntryRepository.php | 2 |
6 files changed, 133 insertions, 131 deletions
diff --git a/app/DoctrineMigrations/Version20170710125843.php b/app/DoctrineMigrations/Version20170710125843.php deleted file mode 100644 index 2cf8647a..00000000 --- a/app/DoctrineMigrations/Version20170710125843.php +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Application\Migrations; | ||
4 | |||
5 | use Doctrine\DBAL\Schema\Schema; | ||
6 | use Wallabag\CoreBundle\Doctrine\WallabagMigration; | ||
7 | |||
8 | /** | ||
9 | * Added `given_url` field in entry table. | ||
10 | */ | ||
11 | class Version20170710125843 extends WallabagMigration | ||
12 | { | ||
13 | /** | ||
14 | * @param Schema $schema | ||
15 | */ | ||
16 | public function up(Schema $schema) | ||
17 | { | ||
18 | $entryTable = $schema->getTable($this->getTable('entry')); | ||
19 | |||
20 | $this->skipIf($entryTable->hasColumn('given_url'), 'It seems that you already played this migration.'); | ||
21 | |||
22 | $entryTable->addColumn('given_url', 'text', [ | ||
23 | 'notnull' => false, | ||
24 | ]); | ||
25 | } | ||
26 | |||
27 | /** | ||
28 | * @param Schema $schema | ||
29 | */ | ||
30 | public function down(Schema $schema) | ||
31 | { | ||
32 | $entryTable = $schema->getTable($this->getTable('entry')); | ||
33 | |||
34 | $this->skipIf(!$entryTable->hasColumn('given_url'), 'It seems that you already played this migration.'); | ||
35 | |||
36 | $entryTable->dropColumn('given_url'); | ||
37 | } | ||
38 | } | ||
diff --git a/app/DoctrineMigrations/Version20171218135243.php b/app/DoctrineMigrations/Version20171218135243.php deleted file mode 100644 index 3060c920..00000000 --- a/app/DoctrineMigrations/Version20171218135243.php +++ /dev/null | |||
@@ -1,48 +0,0 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Application\Migrations; | ||
4 | |||
5 | use Doctrine\DBAL\Schema\Schema; | ||
6 | use Wallabag\CoreBundle\Doctrine\WallabagMigration; | ||
7 | |||
8 | /** | ||
9 | * Added indexes on wallabag_entry.url and wallabag_entry.given_url and wallabag_entry.user_id. | ||
10 | */ | ||
11 | class Version20171218135243 extends WallabagMigration | ||
12 | { | ||
13 | private $indexGivenUrl = 'IDX_entry_given_url'; | ||
14 | |||
15 | /** | ||
16 | * @param Schema $schema | ||
17 | */ | ||
18 | public function up(Schema $schema) | ||
19 | { | ||
20 | $entryTable = $schema->getTable($this->getTable('entry')); | ||
21 | $this->skipIf($entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.'); | ||
22 | |||
23 | switch ($this->connection->getDatabasePlatform()->getName()) { | ||
24 | case 'sqlite': | ||
25 | $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);'; | ||
26 | break; | ||
27 | case 'mysql': | ||
28 | $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url (255), given_url (255), user_id);'; | ||
29 | break; | ||
30 | case 'postgresql': | ||
31 | $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);'; | ||
32 | break; | ||
33 | } | ||
34 | |||
35 | $this->addSql($sql); | ||
36 | } | ||
37 | |||
38 | /** | ||
39 | * @param Schema $schema | ||
40 | */ | ||
41 | public function down(Schema $schema) | ||
42 | { | ||
43 | $entryTable = $schema->getTable($this->getTable('entry')); | ||
44 | $this->skipIf(false === $entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.'); | ||
45 | |||
46 | $entryTable->dropIndex($this->indexGivenUrl); | ||
47 | } | ||
48 | } | ||
diff --git a/app/DoctrineMigrations/Version20190601125843.php b/app/DoctrineMigrations/Version20190601125843.php new file mode 100644 index 00000000..341d64dc --- /dev/null +++ b/app/DoctrineMigrations/Version20190601125843.php | |||
@@ -0,0 +1,74 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Application\Migrations; | ||
4 | |||
5 | use Doctrine\DBAL\Schema\Schema; | ||
6 | use Wallabag\CoreBundle\Doctrine\WallabagMigration; | ||
7 | |||
8 | /** | ||
9 | * Added `given_url` & `hashed_given_url` field in entry table. | ||
10 | */ | ||
11 | class Version20190601125843 extends WallabagMigration | ||
12 | { | ||
13 | /** | ||
14 | * @param Schema $schema | ||
15 | */ | ||
16 | public function up(Schema $schema) | ||
17 | { | ||
18 | $entryTable = $schema->getTable($this->getTable('entry')); | ||
19 | |||
20 | if (!$entryTable->hasColumn('given_url')) { | ||
21 | $entryTable->addColumn('given_url', 'text', [ | ||
22 | 'notnull' => false, | ||
23 | ]); | ||
24 | } | ||
25 | |||
26 | if (!$entryTable->hasColumn('hashed_given_url')) { | ||
27 | $entryTable->addColumn('hashed_given_url', 'text', [ | ||
28 | 'length' => 40, | ||
29 | 'notnull' => false, | ||
30 | ]); | ||
31 | } | ||
32 | |||
33 | $entryTable->dropIndex('hashed_url_user_id'); | ||
34 | $entryTable->addIndex( | ||
35 | [ | ||
36 | 'user_id', | ||
37 | 'hashed_url', | ||
38 | 'hashed_given_url', | ||
39 | ], | ||
40 | 'hashed_urls_user_id', | ||
41 | [], | ||
42 | [ | ||
43 | // specify length for index which is required by MySQL on text field | ||
44 | 'lengths' => [ | ||
45 | // user_id | ||
46 | null, | ||
47 | // hashed_url | ||
48 | 40, | ||
49 | // hashed_given_url | ||
50 | 40, | ||
51 | ], | ||
52 | ] | ||
53 | ); | ||
54 | } | ||
55 | |||
56 | /** | ||
57 | * @param Schema $schema | ||
58 | */ | ||
59 | public function down(Schema $schema) | ||
60 | { | ||
61 | $entryTable = $schema->getTable($this->getTable('entry')); | ||
62 | |||
63 | if ($entryTable->hasColumn('given_url')) { | ||
64 | $entryTable->dropColumn('given_url'); | ||
65 | } | ||
66 | |||
67 | if ($entryTable->hasColumn('hashed_given_url')) { | ||
68 | $entryTable->dropColumn('hashed_given_url'); | ||
69 | } | ||
70 | |||
71 | $entryTable->dropIndex('hashed_urls_user_id'); | ||
72 | $entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]); | ||
73 | } | ||
74 | } | ||
diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 62274136..304dd1b3 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php | |||
@@ -27,9 +27,8 @@ use Wallabag\UserBundle\Entity\User; | |||
27 | * indexes={ | 27 | * indexes={ |
28 | * @ORM\Index(name="created_at", columns={"created_at"}), | 28 | * @ORM\Index(name="created_at", columns={"created_at"}), |
29 | * @ORM\Index(name="uid", columns={"uid"}), | 29 | * @ORM\Index(name="uid", columns={"uid"}), |
30 | * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) | 30 | * @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}}) |
31 | * }, | 31 | * } |
32 | * uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})} | ||
33 | * ) | 32 | * ) |
34 | * @ORM\HasLifecycleCallbacks() | 33 | * @ORM\HasLifecycleCallbacks() |
35 | * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") | 34 | * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") |
@@ -69,30 +68,52 @@ class Entry | |||
69 | private $title; | 68 | private $title; |
70 | 69 | ||
71 | /** | 70 | /** |
71 | * Define the url fetched by wallabag (the final url after potential redirections). | ||
72 | * | ||
72 | * @var string | 73 | * @var string |
73 | * | 74 | * |
74 | * @ORM\Column(name="given_url", type="text", nullable=true) | 75 | * @Assert\NotBlank() |
76 | * @ORM\Column(name="url", type="text", nullable=true) | ||
75 | * | 77 | * |
76 | * @Groups({"entries_for_user", "export_all"}) | 78 | * @Groups({"entries_for_user", "export_all"}) |
77 | */ | 79 | */ |
78 | private $givenUrl; | 80 | private $url; |
79 | 81 | ||
80 | /** | 82 | /** |
81 | * @var string | 83 | * @var string |
82 | * | 84 | * |
83 | * @Assert\NotBlank() | 85 | * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) |
84 | * @ORM\Column(name="url", type="text", nullable=true) | 86 | */ |
87 | private $hashedUrl; | ||
88 | |||
89 | /** | ||
90 | * From where user retrieved/found the url (an other article, a twitter, or the given_url if non are provided). | ||
91 | * | ||
92 | * @var string | ||
93 | * | ||
94 | * @ORM\Column(name="origin_url", type="text", nullable=true) | ||
85 | * | 95 | * |
86 | * @Groups({"entries_for_user", "export_all"}) | 96 | * @Groups({"entries_for_user", "export_all"}) |
87 | */ | 97 | */ |
88 | private $url; | 98 | private $originUrl; |
89 | 99 | ||
90 | /** | 100 | /** |
101 | * Define the url entered by the user (without redirections). | ||
102 | * | ||
91 | * @var string | 103 | * @var string |
92 | * | 104 | * |
93 | * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) | 105 | * @ORM\Column(name="given_url", type="text", nullable=true) |
106 | * | ||
107 | * @Groups({"entries_for_user", "export_all"}) | ||
94 | */ | 108 | */ |
95 | private $hashedUrl; | 109 | private $givenUrl; |
110 | |||
111 | /** | ||
112 | * @var string | ||
113 | * | ||
114 | * @ORM\Column(name="hashed_given_url", type="string", length=40, nullable=true) | ||
115 | */ | ||
116 | private $hashedGivenUrl; | ||
96 | 117 | ||
97 | /** | 118 | /** |
98 | * @var bool | 119 | * @var bool |
@@ -273,15 +294,6 @@ class Entry | |||
273 | */ | 294 | */ |
274 | private $tags; | 295 | private $tags; |
275 | 296 | ||
276 | /** | ||
277 | * @var string | ||
278 | * | ||
279 | * @ORM\Column(name="origin_url", type="text", nullable=true) | ||
280 | * | ||
281 | * @Groups({"entries_for_user", "export_all"}) | ||
282 | */ | ||
283 | private $originUrl; | ||
284 | |||
285 | /* | 297 | /* |
286 | * @param User $user | 298 | * @param User $user |
287 | */ | 299 | */ |
@@ -326,30 +338,6 @@ class Entry | |||
326 | } | 338 | } |
327 | 339 | ||
328 | /** | 340 | /** |
329 | * Set given url. | ||
330 | * | ||
331 | * @param string $givenUrl | ||
332 | * | ||
333 | * @return Entry | ||
334 | */ | ||
335 | public function setGivenUrl($givenUrl) | ||
336 | { | ||
337 | $this->givenUrl = $givenUrl; | ||
338 | |||
339 | return $this; | ||
340 | } | ||
341 | |||
342 | /** | ||
343 | * Get given Url. | ||
344 | * | ||
345 | * @return string | ||
346 | */ | ||
347 | public function getGivenUrl() | ||
348 | { | ||
349 | return $this->givenUrl; | ||
350 | } | ||
351 | |||
352 | /** | ||
353 | * Set url. | 341 | * Set url. |
354 | * | 342 | * |
355 | * @param string $url | 343 | * @param string $url |
@@ -957,6 +945,31 @@ class Entry | |||
957 | } | 945 | } |
958 | 946 | ||
959 | /** | 947 | /** |
948 | * Set origin url. | ||
949 | * | ||
950 | * @param string $givenUrl | ||
951 | * | ||
952 | * @return Entry | ||
953 | */ | ||
954 | public function setGivenUrl($givenUrl) | ||
955 | { | ||
956 | $this->givenUrl = $givenUrl; | ||
957 | $this->hashedGivenUrl = UrlHasher::hashUrl($givenUrl); | ||
958 | |||
959 | return $this; | ||
960 | } | ||
961 | |||
962 | /** | ||
963 | * Get origin url. | ||
964 | * | ||
965 | * @return string | ||
966 | */ | ||
967 | public function getGivenUrl() | ||
968 | { | ||
969 | return $this->givenUrl; | ||
970 | } | ||
971 | |||
972 | /** | ||
960 | * @return string | 973 | * @return string |
961 | */ | 974 | */ |
962 | public function getHashedUrl() | 975 | public function getHashedUrl() |
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 0d6a412d..5901df8b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -76,9 +76,10 @@ class ContentProxy | |||
76 | // Not sure what are the other possible cases where this property is empty | 76 | // Not sure what are the other possible cases where this property is empty |
77 | if (empty($entry->getUrl()) && !empty($url)) { | 77 | if (empty($entry->getUrl()) && !empty($url)) { |
78 | $entry->setUrl($url); | 78 | $entry->setUrl($url); |
79 | $entry->setGivenUrl($url); | ||
80 | } | 79 | } |
81 | 80 | ||
81 | $entry->setGivenUrl($url); | ||
82 | |||
82 | $this->stockEntry($entry, $content); | 83 | $this->stockEntry($entry, $content); |
83 | } | 84 | } |
84 | 85 | ||
diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 299b0b27..8b29aad2 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php | |||
@@ -368,7 +368,7 @@ class EntryRepository extends EntityRepository | |||
368 | { | 368 | { |
369 | $res = $this->createQueryBuilder('e') | 369 | $res = $this->createQueryBuilder('e') |
370 | ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) | 370 | ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) |
371 | // ->orWhere('e.givenUrl = :url')->setParameter('url', $url) | 371 | ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl) |
372 | ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) | 372 | ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) |
373 | ->getQuery() | 373 | ->getQuery() |
374 | ->getResult(); | 374 | ->getResult(); |