]> git.immae.eu Git - github/wallabag/wallabag.git/commitdiff
Use hash given url to avoid duplicate
authorJeremy Benoist <jeremy.benoist@gmail.com>
Wed, 29 May 2019 12:18:04 +0000 (14:18 +0200)
committerJeremy Benoist <jeremy.benoist@gmail.com>
Wed, 29 May 2019 13:56:20 +0000 (15:56 +0200)
Using hashed url we can ensure an index on them to ensure it's fast.

app/DoctrineMigrations/Version20170710125843.php [deleted file]
app/DoctrineMigrations/Version20171218135243.php [deleted file]
app/DoctrineMigrations/Version20190601125843.php [new file with mode: 0644]
src/Wallabag/CoreBundle/Entity/Entry.php
src/Wallabag/CoreBundle/Helper/ContentProxy.php
src/Wallabag/CoreBundle/Repository/EntryRepository.php

diff --git a/app/DoctrineMigrations/Version20170710125843.php b/app/DoctrineMigrations/Version20170710125843.php
deleted file mode 100644 (file)
index 2cf8647..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-<?php
-
-namespace Application\Migrations;
-
-use Doctrine\DBAL\Schema\Schema;
-use Wallabag\CoreBundle\Doctrine\WallabagMigration;
-
-/**
- * Added `given_url` field in entry table.
- */
-class Version20170710125843 extends WallabagMigration
-{
-    /**
-     * @param Schema $schema
-     */
-    public function up(Schema $schema)
-    {
-        $entryTable = $schema->getTable($this->getTable('entry'));
-
-        $this->skipIf($entryTable->hasColumn('given_url'), 'It seems that you already played this migration.');
-
-        $entryTable->addColumn('given_url', 'text', [
-            'notnull' => false,
-        ]);
-    }
-
-    /**
-     * @param Schema $schema
-     */
-    public function down(Schema $schema)
-    {
-        $entryTable = $schema->getTable($this->getTable('entry'));
-
-        $this->skipIf(!$entryTable->hasColumn('given_url'), 'It seems that you already played this migration.');
-
-        $entryTable->dropColumn('given_url');
-    }
-}
diff --git a/app/DoctrineMigrations/Version20171218135243.php b/app/DoctrineMigrations/Version20171218135243.php
deleted file mode 100644 (file)
index 3060c92..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-<?php
-
-namespace Application\Migrations;
-
-use Doctrine\DBAL\Schema\Schema;
-use Wallabag\CoreBundle\Doctrine\WallabagMigration;
-
-/**
- * Added indexes on wallabag_entry.url and wallabag_entry.given_url and wallabag_entry.user_id.
- */
-class Version20171218135243 extends WallabagMigration
-{
-    private $indexGivenUrl = 'IDX_entry_given_url';
-
-    /**
-     * @param Schema $schema
-     */
-    public function up(Schema $schema)
-    {
-        $entryTable = $schema->getTable($this->getTable('entry'));
-        $this->skipIf($entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.');
-
-        switch ($this->connection->getDatabasePlatform()->getName()) {
-            case 'sqlite':
-                $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);';
-                break;
-            case 'mysql':
-                $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url (255), given_url (255), user_id);';
-                break;
-            case 'postgresql':
-                $sql = 'CREATE UNIQUE INDEX ' . $this->indexGivenUrl . ' ON ' . $this->getTable('entry') . ' (url, given_url, user_id);';
-                break;
-        }
-
-        $this->addSql($sql);
-    }
-
-    /**
-     * @param Schema $schema
-     */
-    public function down(Schema $schema)
-    {
-        $entryTable = $schema->getTable($this->getTable('entry'));
-        $this->skipIf(false === $entryTable->hasIndex($this->indexGivenUrl), 'It seems that you already played this migration.');
-
-        $entryTable->dropIndex($this->indexGivenUrl);
-    }
-}
diff --git a/app/DoctrineMigrations/Version20190601125843.php b/app/DoctrineMigrations/Version20190601125843.php
new file mode 100644 (file)
index 0000000..341d64d
--- /dev/null
@@ -0,0 +1,74 @@
+<?php
+
+namespace Application\Migrations;
+
+use Doctrine\DBAL\Schema\Schema;
+use Wallabag\CoreBundle\Doctrine\WallabagMigration;
+
+/**
+ * Added `given_url` & `hashed_given_url` field in entry table.
+ */
+class Version20190601125843 extends WallabagMigration
+{
+    /**
+     * @param Schema $schema
+     */
+    public function up(Schema $schema)
+    {
+        $entryTable = $schema->getTable($this->getTable('entry'));
+
+        if (!$entryTable->hasColumn('given_url')) {
+            $entryTable->addColumn('given_url', 'text', [
+                'notnull' => false,
+            ]);
+        }
+
+        if (!$entryTable->hasColumn('hashed_given_url')) {
+            $entryTable->addColumn('hashed_given_url', 'text', [
+                'length' => 40,
+                'notnull' => false,
+            ]);
+        }
+
+        $entryTable->dropIndex('hashed_url_user_id');
+        $entryTable->addIndex(
+            [
+                'user_id',
+                'hashed_url',
+                'hashed_given_url',
+            ],
+            'hashed_urls_user_id',
+            [],
+            [
+                // specify length for index which is required by MySQL on text field
+                'lengths' => [
+                    // user_id
+                    null,
+                    // hashed_url
+                    40,
+                    // hashed_given_url
+                    40,
+                ],
+            ]
+        );
+    }
+
+    /**
+     * @param Schema $schema
+     */
+    public function down(Schema $schema)
+    {
+        $entryTable = $schema->getTable($this->getTable('entry'));
+
+        if ($entryTable->hasColumn('given_url')) {
+            $entryTable->dropColumn('given_url');
+        }
+
+        if ($entryTable->hasColumn('hashed_given_url')) {
+            $entryTable->dropColumn('hashed_given_url');
+        }
+
+        $entryTable->dropIndex('hashed_urls_user_id');
+        $entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]);
+    }
+}
index 6227413682be4ddf4ab934b378bb1d237cae4c3d..304dd1b3bbe06413cbe9f550c16c43d14633cd33 100644 (file)
@@ -27,9 +27,8 @@ use Wallabag\UserBundle\Entity\User;
  *     indexes={
  *         @ORM\Index(name="created_at", columns={"created_at"}),
  *         @ORM\Index(name="uid", columns={"uid"}),
- *         @ORM\Index(name="hashed_url_user_id", columns={"user_id", "hashed_url"}, options={"lengths"={null, 40}})
- *     },
- *     uniqueConstraints={@ORM\UniqueConstraint(name="IDX_entry_given_url",columns={"url", "given_url", "user_id"})}
+ *         @ORM\Index(name="hashed_urls_user_id", columns={"user_id", "hashed_url", "hashed_given_url"}, options={"lengths"={null, 40, 40}})
+ *     }
  * )
  * @ORM\HasLifecycleCallbacks()
  * @Hateoas\Relation("self", href = "expr('/api/entries/' ~ object.getId())")
@@ -69,30 +68,52 @@ class Entry
     private $title;
 
     /**
+     * Define the url fetched by wallabag (the final url after potential redirections).
+     *
      * @var string
      *
-     * @ORM\Column(name="given_url", type="text", nullable=true)
+     * @Assert\NotBlank()
+     * @ORM\Column(name="url", type="text", nullable=true)
      *
      * @Groups({"entries_for_user", "export_all"})
      */
-    private $givenUrl;
+    private $url;
 
     /**
      * @var string
      *
-     * @Assert\NotBlank()
-     * @ORM\Column(name="url", type="text", nullable=true)
+     * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true)
+     */
+    private $hashedUrl;
+
+    /**
+     * From where user retrieved/found the url (an other article, a twitter, or the given_url if non are provided).
+     *
+     * @var string
+     *
+     * @ORM\Column(name="origin_url", type="text", nullable=true)
      *
      * @Groups({"entries_for_user", "export_all"})
      */
-    private $url;
+    private $originUrl;
 
     /**
+     * Define the url entered by the user (without redirections).
+     *
      * @var string
      *
-     * @ORM\Column(name="hashed_url", type="string", length=40, nullable=true)
+     * @ORM\Column(name="given_url", type="text", nullable=true)
+     *
+     * @Groups({"entries_for_user", "export_all"})
      */
-    private $hashedUrl;
+    private $givenUrl;
+
+    /**
+     * @var string
+     *
+     * @ORM\Column(name="hashed_given_url", type="string", length=40, nullable=true)
+     */
+    private $hashedGivenUrl;
 
     /**
      * @var bool
@@ -273,15 +294,6 @@ class Entry
      */
     private $tags;
 
-    /**
-     * @var string
-     *
-     * @ORM\Column(name="origin_url", type="text", nullable=true)
-     *
-     * @Groups({"entries_for_user", "export_all"})
-     */
-    private $originUrl;
-
     /*
      * @param User     $user
      */
@@ -325,30 +337,6 @@ class Entry
         return $this->title;
     }
 
-    /**
-     * Set given url.
-     *
-     * @param string $givenUrl
-     *
-     * @return Entry
-     */
-    public function setGivenUrl($givenUrl)
-    {
-        $this->givenUrl = $givenUrl;
-
-        return $this;
-    }
-
-    /**
-     * Get given Url.
-     *
-     * @return string
-     */
-    public function getGivenUrl()
-    {
-        return $this->givenUrl;
-    }
-
     /**
      * Set url.
      *
@@ -956,6 +944,31 @@ class Entry
         return $this->originUrl;
     }
 
+    /**
+     * Set origin url.
+     *
+     * @param string $givenUrl
+     *
+     * @return Entry
+     */
+    public function setGivenUrl($givenUrl)
+    {
+        $this->givenUrl = $givenUrl;
+        $this->hashedGivenUrl = UrlHasher::hashUrl($givenUrl);
+
+        return $this;
+    }
+
+    /**
+     * Get origin url.
+     *
+     * @return string
+     */
+    public function getGivenUrl()
+    {
+        return $this->givenUrl;
+    }
+
     /**
      * @return string
      */
index 0d6a412d5bc835032db09e4354a36146a7ddb1cb..5901df8bdc0a456d8cf6c5ae1ac5c605cacb0027 100644 (file)
@@ -76,9 +76,10 @@ class ContentProxy
         // Not sure what are the other possible cases where this property is empty
         if (empty($entry->getUrl()) && !empty($url)) {
             $entry->setUrl($url);
-            $entry->setGivenUrl($url);
         }
 
+        $entry->setGivenUrl($url);
+
         $this->stockEntry($entry, $content);
     }
 
index 299b0b27612a476164452e4d64f1d49a5de7d3d2..8b29aad2d2e81823239ac98e9c91454cc8802c56 100644 (file)
@@ -368,7 +368,7 @@ class EntryRepository extends EntityRepository
     {
         $res = $this->createQueryBuilder('e')
             ->where('e.hashedUrl = :hashed_url')->setParameter('hashed_url', $hashedUrl)
-            // ->orWhere('e.givenUrl = :url')->setParameter('url', $url)
+            ->orWhere('e.hashedGivenUrl = :hashed_given_url')->setParameter('hashed_given_url', $hashedUrl)
             ->andWhere('e.user = :user_id')->setParameter('user_id', $userId)
             ->getQuery()
             ->getResult();