]>
Commit | Line | Data |
---|---|---|
d4949327 NL |
1 | <?php\r |
2 | \r | |
3 | /**\r | |
4 | * Parses a URI into the components and fragment identifier as specified\r | |
5 | * by RFC 3986.\r | |
6 | */\r | |
7 | class HTMLPurifier_URIParser\r | |
8 | {\r | |
9 | \r | |
10 | /**\r | |
11 | * Instance of HTMLPurifier_PercentEncoder to do normalization with.\r | |
12 | */\r | |
13 | protected $percentEncoder;\r | |
14 | \r | |
15 | public function __construct()\r | |
16 | {\r | |
17 | $this->percentEncoder = new HTMLPurifier_PercentEncoder();\r | |
18 | }\r | |
19 | \r | |
20 | /**\r | |
21 | * Parses a URI.\r | |
22 | * @param $uri string URI to parse\r | |
23 | * @return HTMLPurifier_URI representation of URI. This representation has\r | |
24 | * not been validated yet and may not conform to RFC.\r | |
25 | */\r | |
26 | public function parse($uri)\r | |
27 | {\r | |
28 | $uri = $this->percentEncoder->normalize($uri);\r | |
29 | \r | |
30 | // Regexp is as per Appendix B.\r | |
31 | // Note that ["<>] are an addition to the RFC's recommended\r | |
32 | // characters, because they represent external delimeters.\r | |
33 | $r_URI = '!'.\r | |
34 | '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme\r | |
35 | '(//([^/?#"<>]*))?'. // 4. Authority\r | |
36 | '([^?#"<>]*)'. // 5. Path\r | |
37 | '(\?([^#"<>]*))?'. // 7. Query\r | |
38 | '(#([^"<>]*))?'. // 8. Fragment\r | |
39 | '!';\r | |
40 | \r | |
41 | $matches = array();\r | |
42 | $result = preg_match($r_URI, $uri, $matches);\r | |
43 | \r | |
44 | if (!$result) return false; // *really* invalid URI\r | |
45 | \r | |
46 | // seperate out parts\r | |
47 | $scheme = !empty($matches[1]) ? $matches[2] : null;\r | |
48 | $authority = !empty($matches[3]) ? $matches[4] : null;\r | |
49 | $path = $matches[5]; // always present, can be empty\r | |
50 | $query = !empty($matches[6]) ? $matches[7] : null;\r | |
51 | $fragment = !empty($matches[8]) ? $matches[9] : null;\r | |
52 | \r | |
53 | // further parse authority\r | |
54 | if ($authority !== null) {\r | |
55 | $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/";\r | |
56 | $matches = array();\r | |
57 | preg_match($r_authority, $authority, $matches);\r | |
58 | $userinfo = !empty($matches[1]) ? $matches[2] : null;\r | |
59 | $host = !empty($matches[3]) ? $matches[3] : '';\r | |
60 | $port = !empty($matches[4]) ? (int) $matches[5] : null;\r | |
61 | } else {\r | |
62 | $port = $host = $userinfo = null;\r | |
63 | }\r | |
64 | \r | |
65 | return new HTMLPurifier_URI(\r | |
66 | $scheme, $userinfo, $host, $port, $path, $query, $fragment);\r | |
67 | }\r | |
68 | \r | |
69 | }\r | |
70 | \r | |
71 | // vim: et sw=4 sts=4\r |