diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2014-02-21 15:43:14 +0100 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2014-02-21 15:43:14 +0100 |
commit | d4949327efa15b492cab1bef3fe074290a328a17 (patch) | |
tree | e89e0322bb1f1b06d663fd10fdded21bac867e5d /inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php | |
parent | c9bd17a1007bb78e5de0775efca01df0fb515031 (diff) | |
download | wallabag-d4949327efa15b492cab1bef3fe074290a328a17.tar.gz wallabag-d4949327efa15b492cab1bef3fe074290a328a17.tar.zst wallabag-d4949327efa15b492cab1bef3fe074290a328a17.zip |
[add] HTML Purifier added to clean code
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php')
-rw-r--r-- | inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php new file mode 100644 index 00000000..699978dc --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php | |||
@@ -0,0 +1,71 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Parses a URI into the components and fragment identifier as specified | ||
5 | * by RFC 3986. | ||
6 | */ | ||
7 | class HTMLPurifier_URIParser | ||
8 | { | ||
9 | |||
10 | /** | ||
11 | * Instance of HTMLPurifier_PercentEncoder to do normalization with. | ||
12 | */ | ||
13 | protected $percentEncoder; | ||
14 | |||
15 | public function __construct() | ||
16 | { | ||
17 | $this->percentEncoder = new HTMLPurifier_PercentEncoder(); | ||
18 | } | ||
19 | |||
20 | /** | ||
21 | * Parses a URI. | ||
22 | * @param $uri string URI to parse | ||
23 | * @return HTMLPurifier_URI representation of URI. This representation has | ||
24 | * not been validated yet and may not conform to RFC. | ||
25 | */ | ||
26 | public function parse($uri) | ||
27 | { | ||
28 | $uri = $this->percentEncoder->normalize($uri); | ||
29 | |||
30 | // Regexp is as per Appendix B. | ||
31 | // Note that ["<>] are an addition to the RFC's recommended | ||
32 | // characters, because they represent external delimeters. | ||
33 | $r_URI = '!'. | ||
34 | '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme | ||
35 | '(//([^/?#"<>]*))?'. // 4. Authority | ||
36 | '([^?#"<>]*)'. // 5. Path | ||
37 | '(\?([^#"<>]*))?'. // 7. Query | ||
38 | '(#([^"<>]*))?'. // 8. Fragment | ||
39 | '!'; | ||
40 | |||
41 | $matches = array(); | ||
42 | $result = preg_match($r_URI, $uri, $matches); | ||
43 | |||
44 | if (!$result) return false; // *really* invalid URI | ||
45 | |||
46 | // seperate out parts | ||
47 | $scheme = !empty($matches[1]) ? $matches[2] : null; | ||
48 | $authority = !empty($matches[3]) ? $matches[4] : null; | ||
49 | $path = $matches[5]; // always present, can be empty | ||
50 | $query = !empty($matches[6]) ? $matches[7] : null; | ||
51 | $fragment = !empty($matches[8]) ? $matches[9] : null; | ||
52 | |||
53 | // further parse authority | ||
54 | if ($authority !== null) { | ||
55 | $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; | ||
56 | $matches = array(); | ||
57 | preg_match($r_authority, $authority, $matches); | ||
58 | $userinfo = !empty($matches[1]) ? $matches[2] : null; | ||
59 | $host = !empty($matches[3]) ? $matches[3] : ''; | ||
60 | $port = !empty($matches[4]) ? (int) $matches[5] : null; | ||
61 | } else { | ||
62 | $port = $host = $userinfo = null; | ||
63 | } | ||
64 | |||
65 | return new HTMLPurifier_URI( | ||
66 | $scheme, $userinfo, $host, $port, $path, $query, $fragment); | ||
67 | } | ||
68 | |||
69 | } | ||
70 | |||
71 | // vim: et sw=4 sts=4 | ||