aboutsummaryrefslogtreecommitdiffhomepage
path: root/inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php
diff options
context:
space:
mode:
authorNicolas LÅ“uillet <nicolas.loeuillet@gmail.com>2014-02-21 15:43:14 +0100
committerNicolas LÅ“uillet <nicolas.loeuillet@gmail.com>2014-02-21 15:43:14 +0100
commitd4949327efa15b492cab1bef3fe074290a328a17 (patch)
treee89e0322bb1f1b06d663fd10fdded21bac867e5d /inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php
parentc9bd17a1007bb78e5de0775efca01df0fb515031 (diff)
downloadwallabag-d4949327efa15b492cab1bef3fe074290a328a17.tar.gz
wallabag-d4949327efa15b492cab1bef3fe074290a328a17.tar.zst
wallabag-d4949327efa15b492cab1bef3fe074290a328a17.zip
[add] HTML Purifier added to clean code
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php')
-rw-r--r--inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php71
1 files changed, 71 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php
new file mode 100644
index 00000000..699978dc
--- /dev/null
+++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/URIParser.php
@@ -0,0 +1,71 @@
1<?php
2
3/**
4 * Parses a URI into the components and fragment identifier as specified
5 * by RFC 3986.
6 */
7class HTMLPurifier_URIParser
8{
9
10 /**
11 * Instance of HTMLPurifier_PercentEncoder to do normalization with.
12 */
13 protected $percentEncoder;
14
15 public function __construct()
16 {
17 $this->percentEncoder = new HTMLPurifier_PercentEncoder();
18 }
19
20 /**
21 * Parses a URI.
22 * @param $uri string URI to parse
23 * @return HTMLPurifier_URI representation of URI. This representation has
24 * not been validated yet and may not conform to RFC.
25 */
26 public function parse($uri)
27 {
28 $uri = $this->percentEncoder->normalize($uri);
29
30 // Regexp is as per Appendix B.
31 // Note that ["<>] are an addition to the RFC's recommended
32 // characters, because they represent external delimeters.
33 $r_URI = '!'.
34 '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
35 '(//([^/?#"<>]*))?'. // 4. Authority
36 '([^?#"<>]*)'. // 5. Path
37 '(\?([^#"<>]*))?'. // 7. Query
38 '(#([^"<>]*))?'. // 8. Fragment
39 '!';
40
41 $matches = array();
42 $result = preg_match($r_URI, $uri, $matches);
43
44 if (!$result) return false; // *really* invalid URI
45
46 // seperate out parts
47 $scheme = !empty($matches[1]) ? $matches[2] : null;
48 $authority = !empty($matches[3]) ? $matches[4] : null;
49 $path = $matches[5]; // always present, can be empty
50 $query = !empty($matches[6]) ? $matches[7] : null;
51 $fragment = !empty($matches[8]) ? $matches[9] : null;
52
53 // further parse authority
54 if ($authority !== null) {
55 $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";
56 $matches = array();
57 preg_match($r_authority, $authority, $matches);
58 $userinfo = !empty($matches[1]) ? $matches[2] : null;
59 $host = !empty($matches[3]) ? $matches[3] : '';
60 $port = !empty($matches[4]) ? (int) $matches[5] : null;
61 } else {
62 $port = $host = $userinfo = null;
63 }
64
65 return new HTMLPurifier_URI(
66 $scheme, $userinfo, $host, $port, $path, $query, $fragment);
67 }
68
69}
70
71// vim: et sw=4 sts=4