From f3bfb875e94021a93e24a41fbc0f8d86d4dee378 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 29 May 2019 14:18:04 +0200 Subject: Use hash given url to avoid duplicate Using hashed url we can ensure an index on them to ensure it's fast. --- app/DoctrineMigrations/Version20190601125843.php | 74 ++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 app/DoctrineMigrations/Version20190601125843.php (limited to 'app/DoctrineMigrations/Version20190601125843.php') diff --git a/app/DoctrineMigrations/Version20190601125843.php b/app/DoctrineMigrations/Version20190601125843.php new file mode 100644 index 00000000..341d64dc --- /dev/null +++ b/app/DoctrineMigrations/Version20190601125843.php @@ -0,0 +1,74 @@ +getTable($this->getTable('entry')); + + if (!$entryTable->hasColumn('given_url')) { + $entryTable->addColumn('given_url', 'text', [ + 'notnull' => false, + ]); + } + + if (!$entryTable->hasColumn('hashed_given_url')) { + $entryTable->addColumn('hashed_given_url', 'text', [ + 'length' => 40, + 'notnull' => false, + ]); + } + + $entryTable->dropIndex('hashed_url_user_id'); + $entryTable->addIndex( + [ + 'user_id', + 'hashed_url', + 'hashed_given_url', + ], + 'hashed_urls_user_id', + [], + [ + // specify length for index which is required by MySQL on text field + 'lengths' => [ + // user_id + null, + // hashed_url + 40, + // hashed_given_url + 40, + ], + ] + ); + } + + /** + * @param Schema $schema + */ + public function down(Schema $schema) + { + $entryTable = $schema->getTable($this->getTable('entry')); + + if ($entryTable->hasColumn('given_url')) { + $entryTable->dropColumn('given_url'); + } + + if ($entryTable->hasColumn('hashed_given_url')) { + $entryTable->dropColumn('hashed_given_url'); + } + + $entryTable->dropIndex('hashed_urls_user_id'); + $entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]); + } +} -- cgit v1.2.3 From 70df4c335965a9562cc24d3ccea0a6ed1a23b7b1 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 5 Jun 2019 10:51:06 +0200 Subject: Use two indexes instead of one for hashed urls When using `OR` in a where clause, a composite index can't be used. We should use a `UNION` to take advantages of it. Instead, create 2 indexes on each hashed urls and make 2 queries to find an url. It'll be faster than the previous solution. --- app/DoctrineMigrations/Version20190601125843.php | 26 +++--------------------- 1 file changed, 3 insertions(+), 23 deletions(-) (limited to 'app/DoctrineMigrations/Version20190601125843.php') diff --git a/app/DoctrineMigrations/Version20190601125843.php b/app/DoctrineMigrations/Version20190601125843.php index 341d64dc..0e97606e 100644 --- a/app/DoctrineMigrations/Version20190601125843.php +++ b/app/DoctrineMigrations/Version20190601125843.php @@ -30,27 +30,8 @@ class Version20190601125843 extends WallabagMigration ]); } - $entryTable->dropIndex('hashed_url_user_id'); - $entryTable->addIndex( - [ - 'user_id', - 'hashed_url', - 'hashed_given_url', - ], - 'hashed_urls_user_id', - [], - [ - // specify length for index which is required by MySQL on text field - 'lengths' => [ - // user_id - null, - // hashed_url - 40, - // hashed_given_url - 40, - ], - ] - ); + // 40 = length of sha1 field hashed_given_url + $entryTable->addIndex(['user_id', 'hashed_given_url'], 'hashed_given_url_user_id', [], ['lengths' => [null, 40]]); } /** @@ -68,7 +49,6 @@ class Version20190601125843 extends WallabagMigration $entryTable->dropColumn('hashed_given_url'); } - $entryTable->dropIndex('hashed_urls_user_id'); - $entryTable->addIndex(['user_id', 'hashed_url'], 'hashed_url_user_id', [], ['lengths' => [null, 40]]); + $entryTable->dropIndex('hashed_given_url_user_id'); } } -- cgit v1.2.3