diff options
author | Jeremy Benoist <jeremy.benoist@gmail.com> | 2019-05-29 14:18:04 +0200 |
---|---|---|
committer | Jeremy Benoist <jeremy.benoist@gmail.com> | 2019-05-29 15:56:20 +0200 |
commit | f3bfb875e94021a93e24a41fbc0f8d86d4dee378 (patch) | |
tree | 34f7efd0f3dc5326f68364e2fb2544619518c371 /src | |
parent | b7fa51ae7dd5fef2d9459100c88479413ddd3fb3 (diff) | |
download | wallabag-f3bfb875e94021a93e24a41fbc0f8d86d4dee378.tar.gz wallabag-f3bfb875e94021a93e24a41fbc0f8d86d4dee378.tar.zst wallabag-f3bfb875e94021a93e24a41fbc0f8d86d4dee378.zip |
Use hash given url to avoid duplicate
Using hashed url we can ensure an index on them to ensure it's fast.
Diffstat (limited to 'src')
-rw-r--r-- | src/Wallabag/CoreBundle/Entity/Entry.php | 99 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 3 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Repository/EntryRepository.php | 2 |
3 files changed, 59 insertions, 45 deletions
diff --git a/src/Wallabag/CoreBundle/Entity/Entry.php b/src/Wallabag/CoreBundle/Entity/Entry.php index 62274136..304dd1b3 100644 --- a/src/Wallabag/CoreBundle/Entity/Entry.php +++ b/src/Wallabag/CoreBundle/Entity/Entry.php | |||
@@ -27,9 +27,8 @@ use Wallabag\UserBundle\Entity\User; | |||
27 | * indexes={ | 27 | * indexes={ |
28 | * @ORM\Index(name="created_at", columns={"created_at"}), | 28 | * @ORM\Index(name="created_at", columns={"created_at"}), |
29 | * @ORM\Index(name="uid", columns={"uid"}), | 29 | * @ORM\Index(name="uid", columns={"uid"}), |
30 | * @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}}) | 30 | * @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}}) |
31 | * }, | 31 | * } |
32 | * uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})} | ||
33 | * ) | 32 | * ) |
34 | * @ORM\HasLifecycleCallbacks() | 33 | * @ORM\HasLifecycleCallbacks() |
35 | * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") | 34 | * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())") |
@@ -69,30 +68,52 @@ class Entry | |||
69 | private $title; | 68 | private $title; |
70 | 69 | ||
71 | /** | 70 | /** |
71 | * Define the url fetched by wallabag (the final url after potential redirections). | ||
72 | * | ||
72 | * @var string | 73 | * @var string |
73 | * | 74 | * |
74 | * @ORM\Column(name="given_url", type="text", nullable=true) | 75 | * @Assert\NotBlank() |
76 | * @ORM\Column(name="url", type="text", nullable=true) | ||
75 | * | 77 | * |
76 | * @Groups({"entries_for_user", "export_all"}) | 78 | * @Groups({"entries_for_user", "export_all"}) |
77 | */ | 79 | */ |
78 | private $givenUrl; | 80 | private $url; |
79 | 81 | ||
80 | /** | 82 | /** |
81 | * @var string | 83 | * @var string |
82 | * | 84 | * |
83 | * @Assert\NotBlank() | 85 | * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) |
84 | * @ORM\Column(name="url", type="text", nullable=true) | 86 | */ |
87 | private $hashedUrl; | ||
88 | |||
89 | /** | ||
90 | * From where user retrieved/found the url (an other article, a twitter, or the given_url if non are provided). | ||
91 | * | ||
92 | * @var string | ||
93 | * | ||
94 | * @ORM\Column(name="origin_url", type="text", nullable=true) | ||
85 | * | 95 | * |
86 | * @Groups({"entries_for_user", "export_all"}) | 96 | * @Groups({"entries_for_user", "export_all"}) |
87 | */ | 97 | */ |
88 | private $url; | 98 | private $originUrl; |
89 | 99 | ||
90 | /** | 100 | /** |
101 | * Define the url entered by the user (without redirections). | ||
102 | * | ||
91 | * @var string | 103 | * @var string |
92 | * | 104 | * |
93 | * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true) | 105 | * @ORM\Column(name="given_url", type="text", nullable=true) |
106 | * | ||
107 | * @Groups({"entries_for_user", "export_all"}) | ||
94 | */ | 108 | */ |
95 | private $hashedUrl; | 109 | private $givenUrl; |
110 | |||
111 | /** | ||
112 | * @var string | ||
113 | * | ||
114 | * @ORM\Column(name="hashed_given_url", type="string", length=40, nullable=true) | ||
115 | */ | ||
116 | private $hashedGivenUrl; | ||
96 | 117 | ||
97 | /** | 118 | /** |
98 | * @var bool | 119 | * @var bool |
@@ -273,15 +294,6 @@ class Entry | |||
273 | */ | 294 | */ |
274 | private $tags; | 295 | private $tags; |
275 | 296 | ||
276 | /** | ||
277 | * @var string | ||
278 | * | ||
279 | * @ORM\Column(name="origin_url", type="text", nullable=true) | ||
280 | * | ||
281 | * @Groups({"entries_for_user", "export_all"}) | ||
282 | */ | ||
283 | private $originUrl; | ||
284 | |||
285 | /* | 297 | /* |
286 | * @param User $user | 298 | * @param User $user |
287 | */ | 299 | */ |
@@ -326,30 +338,6 @@ class Entry | |||
326 | } | 338 | } |
327 | 339 | ||
328 | /** | 340 | /** |
329 | * Set given url. | ||
330 | * | ||
331 | * @param string $givenUrl | ||
332 | * | ||
333 | * @return Entry | ||
334 | */ | ||
335 | public function setGivenUrl($givenUrl) | ||
336 | { | ||
337 | $this->givenUrl = $givenUrl; | ||
338 | |||
339 | return $this; | ||
340 | } | ||
341 | |||
342 | /** | ||
343 | * Get given Url. | ||
344 | * | ||
345 | * @return string | ||
346 | */ | ||
347 | public function getGivenUrl() | ||
348 | { | ||
349 | return $this->givenUrl; | ||
350 | } | ||
351 | |||
352 | /** | ||
353 | * Set url. | 341 | * Set url. |
354 | * | 342 | * |
355 | * @param string $url | 343 | * @param string $url |
@@ -957,6 +945,31 @@ class Entry | |||
957 | } | 945 | } |
958 | 946 | ||
959 | /** | 947 | /** |
948 | * Set origin url. | ||
949 | * | ||
950 | * @param string $givenUrl | ||
951 | * | ||
952 | * @return Entry | ||
953 | */ | ||
954 | public function setGivenUrl($givenUrl) | ||
955 | { | ||
956 | $this->givenUrl = $givenUrl; | ||
957 | $this->hashedGivenUrl = UrlHasher::hashUrl($givenUrl); | ||
958 | |||
959 | return $this; | ||
960 | } | ||
961 | |||
962 | /** | ||
963 | * Get origin url. | ||
964 | * | ||
965 | * @return string | ||
966 | */ | ||
967 | public function getGivenUrl() | ||
968 | { | ||
969 | return $this->givenUrl; | ||
970 | } | ||
971 | |||
972 | /** | ||
960 | * @return string | 973 | * @return string |
961 | */ | 974 | */ |
962 | public function getHashedUrl() | 975 | public function getHashedUrl() |
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 0d6a412d..5901df8b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -76,9 +76,10 @@ class ContentProxy | |||
76 | // Not sure what are the other possible cases where this property is empty | 76 | // Not sure what are the other possible cases where this property is empty |
77 | if (empty($entry->getUrl()) && !empty($url)) { | 77 | if (empty($entry->getUrl()) && !empty($url)) { |
78 | $entry->setUrl($url); | 78 | $entry->setUrl($url); |
79 | $entry->setGivenUrl($url); | ||
80 | } | 79 | } |
81 | 80 | ||
81 | $entry->setGivenUrl($url); | ||
82 | |||
82 | $this->stockEntry($entry, $content); | 83 | $this->stockEntry($entry, $content); |
83 | } | 84 | } |
84 | 85 | ||
diff --git a/src/Wallabag/CoreBundle/Repository/EntryRepository.php b/src/Wallabag/CoreBundle/Repository/EntryRepository.php index 299b0b27..8b29aad2 100644 --- a/src/Wallabag/CoreBundle/Repository/EntryRepository.php +++ b/src/Wallabag/CoreBundle/Repository/EntryRepository.php | |||
@@ -368,7 +368,7 @@ class EntryRepository extends EntityRepository | |||
368 | { | 368 | { |
369 | $res = $this->createQueryBuilder('e') | 369 | $res = $this->createQueryBuilder('e') |
370 | ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) | 370 | ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl) |
371 | // ->orWhere('e.givenUrl = :url')->setParameter('url', $url) | 371 | ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl) |
372 | ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) | 372 | ->andWhere('e.user = :user_id')->setParameter('user_id', $userId) |
373 | ->getQuery() | 373 | ->getQuery() |
374 | ->getResult(); | 374 | ->getResult(); |