]> git.immae.eu Git - github/wallabag/wallabag.git/commitdiff
Merge pull request #3192 from wallabag/validate-content-fields
authorJérémy Benoist <j0k3r@users.noreply.github.com>
Fri, 9 Jun 2017 11:51:26 +0000 (13:51 +0200)
committerGitHub <noreply@github.com>
Fri, 9 Jun 2017 11:51:26 +0000 (13:51 +0200)
Validate language & preview picture fields

src/Wallabag/CoreBundle/Helper/ContentProxy.php
src/Wallabag/CoreBundle/Resources/config/services.yml
tests/Wallabag/ApiBundle/Controller/EntryRestControllerTest.php
tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php
tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php

index d5820e6633e2250f1a53c7554046f5fc238a781f..0c971863bcd670957a7f4c0f6d172d98bebdf4f0 100644 (file)
@@ -7,6 +7,9 @@ use Psr\Log\LoggerInterface;
 use Wallabag\CoreBundle\Entity\Entry;
 use Wallabag\CoreBundle\Tools\Utils;
 use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
+use Symfony\Component\Validator\Constraints\Locale as LocaleConstraint;
+use Symfony\Component\Validator\Constraints\Url as UrlConstraint;
+use Symfony\Component\Validator\Validator\ValidatorInterface;
 
 /**
  * This kind of proxy class take care of getting the content from an url
@@ -16,15 +19,17 @@ class ContentProxy
 {
     protected $graby;
     protected $tagger;
+    protected $validator;
     protected $logger;
     protected $mimeGuesser;
     protected $fetchingErrorMessage;
     protected $eventDispatcher;
 
-    public function __construct(Graby $graby, RuleBasedTagger $tagger, LoggerInterface $logger, $fetchingErrorMessage)
+    public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage)
     {
         $this->graby = $graby;
         $this->tagger = $tagger;
+        $this->validator = $validator;
         $this->logger = $logger;
         $this->mimeGuesser = new MimeTypeExtensionGuesser();
         $this->fetchingErrorMessage = $fetchingErrorMessage;
@@ -113,7 +118,24 @@ class ContentProxy
             $entry->setHeaders($content['all_headers']);
         }
 
-        $entry->setLanguage(isset($content['language']) ? $content['language'] : '');
+        $this->validateAndSetLanguage(
+            $entry,
+            isset($content['language']) ? $content['language'] : ''
+        );
+
+        $this->validateAndSetPreviewPicture(
+            $entry,
+            isset($content['open_graph']['og_image']) ? $content['open_graph']['og_image'] : ''
+        );
+
+        // if content is an image, define it as a preview too
+        if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
+            $this->validateAndSetPreviewPicture(
+                $entry,
+                $content['url']
+            );
+        }
+
         $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : '');
         $entry->setReadingTime(Utils::getReadingTime($html));
 
@@ -122,15 +144,6 @@ class ContentProxy
             $entry->setDomainName($domainName);
         }
 
-        if (!empty($content['open_graph']['og_image'])) {
-            $entry->setPreviewPicture($content['open_graph']['og_image']);
-        }
-
-        // if content is an image define as a preview too
-        if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
-            $entry->setPreviewPicture($content['url']);
-        }
-
         try {
             $this->tagger->tag($entry);
         } catch (\Exception $e) {
@@ -152,4 +165,52 @@ class ContentProxy
     {
         return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
     }
+
+    /**
+     * Use a Symfony validator to ensure the language is well formatted.
+     *
+     * @param Entry  $entry
+     * @param string $value Language to validate
+     */
+    private function validateAndSetLanguage($entry, $value)
+    {
+        // some lang are defined as fr-FR, es-ES.
+        // replacing - by _ might increase language support
+        $value = str_replace('-', '_', $value);
+
+        $errors = $this->validator->validate(
+            $value,
+            (new LocaleConstraint())
+        );
+
+        if (0 === count($errors)) {
+            $entry->setLanguage($value);
+
+            return;
+        }
+
+        $this->logger->warning('Language validation failed. '.(string) $errors);
+    }
+
+    /**
+     * Use a Symfony validator to ensure the preview picture is a real url.
+     *
+     * @param Entry  $entry
+     * @param string $value URL to validate
+     */
+    private function validateAndSetPreviewPicture($entry, $value)
+    {
+        $errors = $this->validator->validate(
+            $value,
+            (new UrlConstraint())
+        );
+
+        if (0 === count($errors)) {
+            $entry->setPreviewPicture($value);
+
+            return;
+        }
+
+        $this->logger->warning('PreviewPicture validation failed. '.(string) $errors);
+    }
 }
index a9b0d2d564a5dc7fcdcd55d99105ae19b5694295..2ae5d27f2fbd2166ab9d5913b5e7a79163e8250e 100644 (file)
@@ -90,6 +90,7 @@ services:
         arguments:
             - "@wallabag_core.graby"
             - "@wallabag_core.rule_based_tagger"
+            - "@validator"
             - "@logger"
             - '%wallabag_core.fetching_error_message%'
 
index 74ec34b17bcbba81c2525c3941e038502b2ffa9b..4aa60e90d265592e73b9745c7143d6d73d6117a1 100644 (file)
@@ -345,7 +345,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
             'tags' => 'google',
             'title' => 'New title for my article',
             'content' => 'my content',
-            'language' => 'de_DE',
+            'language' => 'de',
             'published_at' => '2016-09-08T11:55:58+0200',
             'authors' => 'bob,helen',
         ]);
@@ -362,7 +362,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
         $this->assertEquals(1, $content['user_id']);
         $this->assertCount(2, $content['tags']);
         $this->assertSame('my content', $content['content']);
-        $this->assertSame('de_DE', $content['language']);
+        $this->assertSame('de', $content['language']);
         $this->assertSame('2016-09-08T11:55:58+0200', $content['published_at']);
         $this->assertCount(2, $content['published_by']);
         $this->assertContains('bob', $content['published_by']);
@@ -477,7 +477,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
             'tags' => 'new tag '.uniqid(),
             'starred' => '1',
             'archive' => '0',
-            'language' => 'de_DE',
+            'language' => 'de_AT',
             'preview_picture' => 'http://preview.io/picture.jpg',
             'authors' => 'bob,sponge',
             'content' => 'awesome',
@@ -492,7 +492,7 @@ class EntryRestControllerTest extends WallabagApiTestCase
         $this->assertEquals('New awesome title', $content['title']);
         $this->assertGreaterThan($nbTags, count($content['tags']));
         $this->assertEquals(1, $content['user_id']);
-        $this->assertEquals('de_DE', $content['language']);
+        $this->assertEquals('de_AT', $content['language']);
         $this->assertEquals('http://preview.io/picture.jpg', $content['preview_picture']);
         $this->assertContains('sponge', $content['published_by']);
         $this->assertContains('bob', $content['published_by']);
index cc7b3672c73e6ddc98f6d7ce33a73c7cbdae52ee..3babbaca0654dfecf6bb1c06422af0a5a7738f93 100644 (file)
@@ -158,6 +158,7 @@ class EntryControllerTest extends WallabagCoreTestCase
         $this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content);
         $this->assertEquals($this->url, $content->getUrl());
         $this->assertContains('Google', $content->getTitle());
+        $this->assertEquals('fr', $content->getLanguage());
         $this->assertEquals('2015-03-28 15:37:39', $content->getPublishedAt()->format('Y-m-d H:i:s'));
         $this->assertEquals('Morgane Tual', $author[0]);
         $this->assertArrayHasKey('x-varnish1', $content->getHeaders());
@@ -190,6 +191,7 @@ class EntryControllerTest extends WallabagCoreTestCase
 
         $authors = $content->getPublishedBy();
         $this->assertEquals('2017-04-05 19:26:13', $content->getPublishedAt()->format('Y-m-d H:i:s'));
+        $this->assertEquals('fr', $content->getLanguage());
         $this->assertEquals('Raphaël Balenieri, correspondant à Pékin', $authors[0]);
         $this->assertEquals('Frédéric Autran, correspondant à New York', $authors[1]);
     }
@@ -254,15 +256,6 @@ class EntryControllerTest extends WallabagCoreTestCase
 
         $this->assertEquals(302, $client->getResponse()->getStatusCode());
         $this->assertContains('/view/', $client->getResponse()->getTargetUrl());
-
-        $em = $client->getContainer()
-            ->get('doctrine.orm.entity_manager');
-        $entry = $em
-            ->getRepository('WallabagCoreBundle:Entry')
-            ->findOneByUrl(urldecode($url));
-
-        $em->remove($entry);
-        $em->flush();
     }
 
     /**
@@ -297,6 +290,7 @@ class EntryControllerTest extends WallabagCoreTestCase
 
         $this->assertCount(2, $tags);
         $this->assertContains('wallabag', $tags);
+        $this->assertEquals('en', $entry->getLanguage());
 
         $em->remove($entry);
         $em->flush();
@@ -392,8 +386,6 @@ class EntryControllerTest extends WallabagCoreTestCase
     }
 
     /**
-     * @depends testPostNewOk
-     *
      * This test will require an internet connection.
      */
     public function testReload()
@@ -420,9 +412,6 @@ class EntryControllerTest extends WallabagCoreTestCase
         $this->assertNotEmpty($entry->getContent());
     }
 
-    /**
-     * @depends testPostNewOk
-     */
     public function testReloadWithFetchingFailed()
     {
         $this->logInAs('admin');
@@ -1254,4 +1243,82 @@ class EntryControllerTest extends WallabagCoreTestCase
 
         $this->assertCount(1, $crawler->filter('div[class=entry]'));
     }
+
+    public function dataForLanguage()
+    {
+        return [
+            'ru' => [
+                'https://www.pravda.ru/world/09-06-2017/1337283-qatar-0/',
+                'ru',
+            ],
+            'fr-FR' => [
+                'http://www.zataz.com/90-des-dossiers-medicaux-des-coreens-du-sud-vendus-a-des-entreprises-privees/',
+                'fr_FR',
+            ],
+            'de' => [
+                'http://www.bild.de/politik/ausland/theresa-may/wahlbeben-grossbritannien-analyse-52108924.bild.html',
+                'de',
+            ],
+            'it' => [
+                'http://www.ansa.it/sito/notizie/mondo/europa/2017/06/08/voto-gb-seggi-aperti-misure-sicurezza-rafforzate_0cb71f7f-e23b-4d5f-95ca-bc12296419f0.html',
+                'it',
+            ],
+            'zh_CN' => [
+                'http://www.hao123.com/shequ?__noscript__-=1',
+                'zh_CN',
+            ],
+            'de_AT' => [
+                'https://buy.garmin.com/de-AT/AT/catalog/product/compareResult.ep?compareProduct=112885&compareProduct=36728',
+                'de_AT',
+            ],
+            'ru_RU' => [
+                'http://netler.ru/ikt/windows-error-reporting.htm',
+                'ru_RU',
+            ],
+            'pt_BR' => [
+                'http://precodoscombustiveis.com.br/postos/cidade/4121/pr/maringa',
+                'pt_BR',
+            ],
+            'fucked_list_of_languages' => [
+                'http://geocatalog.webservice-energy.org/geonetwork/srv/eng/main.home',
+                '',
+            ],
+            'es-ES' => [
+                'http://www.muylinux.com/2015/04/17/odf-reino-unido-microsoft-google',
+                'es_ES',
+            ],
+        ];
+    }
+
+    /**
+     * @dataProvider dataForLanguage
+     */
+    public function testLanguageValidation($url, $expectedLanguage)
+    {
+        $this->logInAs('admin');
+        $client = $this->getClient();
+
+        $crawler = $client->request('GET', '/new');
+
+        $this->assertEquals(200, $client->getResponse()->getStatusCode());
+
+        $form = $crawler->filter('form[name=entry]')->form();
+
+        $data = [
+            'entry[url]' => $url,
+        ];
+
+        $client->submit($form, $data);
+
+        $this->assertEquals(302, $client->getResponse()->getStatusCode());
+
+        $content = $client->getContainer()
+            ->get('doctrine.orm.entity_manager')
+            ->getRepository('WallabagCoreBundle:Entry')
+            ->findByUrlAndUserId($url, $this->getLoggedInUserId());
+
+        $this->assertInstanceOf('Wallabag\CoreBundle\Entity\Entry', $content);
+        $this->assertEquals($url, $content->getUrl());
+        $this->assertEquals($expectedLanguage, $content->getLanguage());
+    }
 }
index a3570125e11e39fefcd11a53aac334a9f7d9956b..95dd75ba8c3ebe9629f92ac06defe38b2a1a4bf9 100644 (file)
@@ -11,6 +11,9 @@ use Wallabag\CoreBundle\Entity\Tag;
 use Wallabag\UserBundle\Entity\User;
 use Wallabag\CoreBundle\Helper\RuleBasedTagger;
 use Graby\Graby;
+use Symfony\Component\Validator\Validator\RecursiveValidator;
+use Symfony\Component\Validator\ConstraintViolationList;
+use Symfony\Component\Validator\ConstraintViolation;
 
 class ContentProxyTest extends \PHPUnit_Framework_TestCase
 {
@@ -37,7 +40,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
                 'language' => '',
             ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry($entry, 'http://user@:80');
 
@@ -72,7 +75,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
                 'language' => '',
             ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry($entry, 'http://0.0.0.0');
 
@@ -112,7 +115,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
                 ],
             ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry($entry, 'http://domain.io');
 
@@ -154,7 +157,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
                 ],
             ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry($entry, 'http://0.0.0.0');
 
@@ -192,18 +195,112 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
                 'open_graph' => [
                     'og_title' => 'my OG title',
                     'og_description' => 'OG desc',
-                    'og_image' => false,
+                    'og_image' => null,
                 ],
             ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry($entry, 'http://0.0.0.0');
 
         $this->assertEquals('http://1.1.1.1', $entry->getUrl());
         $this->assertEquals('this is my title', $entry->getTitle());
         $this->assertContains('this is my content', $entry->getContent());
-        $this->assertNull($entry->getPreviewPicture());
+        $this->assertEmpty($entry->getPreviewPicture());
+        $this->assertEquals('text/html', $entry->getMimetype());
+        $this->assertEquals('fr', $entry->getLanguage());
+        $this->assertEquals('200', $entry->getHttpStatus());
+        $this->assertEquals(4.0, $entry->getReadingTime());
+        $this->assertEquals('1.1.1.1', $entry->getDomainName());
+    }
+
+    public function testWithContentAndBadLanguage()
+    {
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
+        $validator = $this->getValidator();
+        $validator->expects($this->exactly(2))
+            ->method('validate')
+            ->will($this->onConsecutiveCalls(
+                new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]),
+                new ConstraintViolationList()
+            ));
+
+        $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
+            ->disableOriginalConstructor()
+            ->getMock();
+
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'dontexist',
+                'status' => '200',
+            ]);
+
+        $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+        $this->assertEquals('http://1.1.1.1', $entry->getUrl());
+        $this->assertEquals('this is my title', $entry->getTitle());
+        $this->assertContains('this is my content', $entry->getContent());
+        $this->assertEquals('text/html', $entry->getMimetype());
+        $this->assertEmpty($entry->getLanguage());
+        $this->assertEquals('200', $entry->getHttpStatus());
+        $this->assertEquals(4.0, $entry->getReadingTime());
+        $this->assertEquals('1.1.1.1', $entry->getDomainName());
+    }
+
+    public function testWithContentAndBadOgImage()
+    {
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
+        $validator = $this->getValidator();
+        $validator->expects($this->exactly(2))
+            ->method('validate')
+            ->will($this->onConsecutiveCalls(
+                new ConstraintViolationList(),
+                new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
+            ));
+
+        $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
+            ->disableOriginalConstructor()
+            ->getMock();
+
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'fr',
+                'status' => '200',
+                'open_graph' => [
+                    'og_title' => 'my OG title',
+                    'og_description' => 'OG desc',
+                    'og_image' => 'https://',
+                ],
+            ]);
+
+        $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+        $this->assertEquals('http://1.1.1.1', $entry->getUrl());
+        $this->assertEquals('this is my title', $entry->getTitle());
+        $this->assertContains('this is my content', $entry->getContent());
+        $this->assertEmpty($entry->getPreviewPicture());
         $this->assertEquals('text/html', $entry->getMimetype());
         $this->assertEquals('fr', $entry->getLanguage());
         $this->assertEquals('200', $entry->getHttpStatus());
@@ -217,7 +314,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
         $tagger->expects($this->once())
             ->method('tag');
 
-        $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry(
             $entry,
@@ -259,7 +356,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
         $logHandler = new TestHandler();
         $logger = new Logger('test', [$logHandler]);
 
-        $proxy = new ContentProxy((new Graby()), $tagger, $logger, $this->fetchingErrorMessage);
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry(
             $entry,
@@ -294,7 +391,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
         $handler = new TestHandler();
         $logger->pushHandler($handler);
 
-        $proxy = new ContentProxy((new Graby()), $tagger, $logger, $this->fetchingErrorMessage);
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry(
             $entry,
@@ -331,7 +428,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
             ->method('tag')
             ->will($this->throwException(new \Exception()));
 
-        $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry(
             $entry,
@@ -371,7 +468,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
         $tagger->expects($this->once())
             ->method('tag');
 
-        $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
         $proxy->updateEntry(
             $entry,
@@ -413,4 +510,12 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
     {
         return new NullLogger();
     }
+
+    private function getValidator()
+    {
+        return $this->getMockBuilder(RecursiveValidator::class)
+            ->setMethods(['validate'])
+            ->disableOriginalConstructor()
+            ->getMock();
+    }
 }