diff options
Diffstat (limited to 'application/http')
-rw-r--r-- | application/http/Url.php | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/application/http/Url.php b/application/http/Url.php new file mode 100644 index 00000000..260231c6 --- /dev/null +++ b/application/http/Url.php | |||
@@ -0,0 +1,217 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Shaarli\Http; | ||
4 | |||
5 | /** | ||
6 | * URL representation and cleanup utilities | ||
7 | * | ||
8 | * Form | ||
9 | * scheme://[username:password@]host[:port][/path][?query][#fragment] | ||
10 | * | ||
11 | * Examples | ||
12 | * http://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor | ||
13 | * https://host.name.tld | ||
14 | * https://h2.g2/faq/?vendor=hitchhiker&item=guide&dest=galaxy#answer | ||
15 | * | ||
16 | * @see http://www.faqs.org/rfcs/rfc3986.html | ||
17 | */ | ||
18 | class Url | ||
19 | { | ||
20 | private static $annoyingQueryParams = array( | ||
21 | |||
22 | 'action_object_map=', | ||
23 | 'action_ref_map=', | ||
24 | 'action_type_map=', | ||
25 | 'fb_', | ||
26 | 'fb=', | ||
27 | 'PHPSESSID=', | ||
28 | |||
29 | // Scoop.it | ||
30 | '__scoop', | ||
31 | |||
32 | // Google Analytics & FeedProxy | ||
33 | 'utm_', | ||
34 | |||
35 | // ATInternet | ||
36 | 'xtor=', | ||
37 | |||
38 | // Other | ||
39 | 'campaign_' | ||
40 | ); | ||
41 | |||
42 | private static $annoyingFragments = array( | ||
43 | // ATInternet | ||
44 | 'xtor=RSS-', | ||
45 | |||
46 | // Misc. | ||
47 | 'tk.rss_all' | ||
48 | ); | ||
49 | |||
50 | /* | ||
51 | * URL parts represented as an array | ||
52 | * | ||
53 | * @see http://php.net/parse_url | ||
54 | */ | ||
55 | protected $parts; | ||
56 | |||
57 | /** | ||
58 | * Parses a string containing a URL | ||
59 | * | ||
60 | * @param string $url a string containing a URL | ||
61 | */ | ||
62 | public function __construct($url) | ||
63 | { | ||
64 | $url = self::cleanupUnparsedUrl(trim($url)); | ||
65 | $this->parts = parse_url($url); | ||
66 | |||
67 | if (!empty($url) && empty($this->parts['scheme'])) { | ||
68 | $this->parts['scheme'] = 'http'; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | /** | ||
73 | * Clean up URL before it's parsed. | ||
74 | * ie. handle urlencode, url prefixes, etc. | ||
75 | * | ||
76 | * @param string $url URL to clean. | ||
77 | * | ||
78 | * @return string cleaned URL. | ||
79 | */ | ||
80 | protected static function cleanupUnparsedUrl($url) | ||
81 | { | ||
82 | return self::removeFirefoxAboutReader($url); | ||
83 | } | ||
84 | |||
85 | /** | ||
86 | * Remove Firefox Reader prefix if it's present. | ||
87 | * | ||
88 | * @param string $input url | ||
89 | * | ||
90 | * @return string cleaned url | ||
91 | */ | ||
92 | protected static function removeFirefoxAboutReader($input) | ||
93 | { | ||
94 | $firefoxPrefix = 'about://reader?url='; | ||
95 | if (startsWith($input, $firefoxPrefix)) { | ||
96 | return urldecode(ltrim($input, $firefoxPrefix)); | ||
97 | } | ||
98 | return $input; | ||
99 | } | ||
100 | |||
101 | /** | ||
102 | * Returns a string representation of this URL | ||
103 | */ | ||
104 | public function toString() | ||
105 | { | ||
106 | return unparse_url($this->parts); | ||
107 | } | ||
108 | |||
109 | /** | ||
110 | * Removes undesired query parameters | ||
111 | */ | ||
112 | protected function cleanupQuery() | ||
113 | { | ||
114 | if (!isset($this->parts['query'])) { | ||
115 | return; | ||
116 | } | ||
117 | |||
118 | $queryParams = explode('&', $this->parts['query']); | ||
119 | |||
120 | foreach (self::$annoyingQueryParams as $annoying) { | ||
121 | foreach ($queryParams as $param) { | ||
122 | if (startsWith($param, $annoying)) { | ||
123 | $queryParams = array_diff($queryParams, array($param)); | ||
124 | continue; | ||
125 | } | ||
126 | } | ||
127 | } | ||
128 | |||
129 | if (count($queryParams) == 0) { | ||
130 | unset($this->parts['query']); | ||
131 | return; | ||
132 | } | ||
133 | |||
134 | $this->parts['query'] = implode('&', $queryParams); | ||
135 | } | ||
136 | |||
137 | /** | ||
138 | * Removes undesired fragments | ||
139 | */ | ||
140 | protected function cleanupFragment() | ||
141 | { | ||
142 | if (!isset($this->parts['fragment'])) { | ||
143 | return; | ||
144 | } | ||
145 | |||
146 | foreach (self::$annoyingFragments as $annoying) { | ||
147 | if (startsWith($this->parts['fragment'], $annoying)) { | ||
148 | unset($this->parts['fragment']); | ||
149 | break; | ||
150 | } | ||
151 | } | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * Removes undesired query parameters and fragments | ||
156 | * | ||
157 | * @return string the string representation of this URL after cleanup | ||
158 | */ | ||
159 | public function cleanup() | ||
160 | { | ||
161 | $this->cleanupQuery(); | ||
162 | $this->cleanupFragment(); | ||
163 | return $this->toString(); | ||
164 | } | ||
165 | |||
166 | /** | ||
167 | * Converts an URL with an International Domain Name host to a ASCII one. | ||
168 | * This requires PHP-intl. If it's not available, just returns this->cleanup(). | ||
169 | * | ||
170 | * @return string converted cleaned up URL. | ||
171 | */ | ||
172 | public function idnToAscii() | ||
173 | { | ||
174 | $out = $this->cleanup(); | ||
175 | if (!function_exists('idn_to_ascii') || !isset($this->parts['host'])) { | ||
176 | return $out; | ||
177 | } | ||
178 | $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); | ||
179 | return str_replace($this->parts['host'], $asciiHost, $out); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * Get URL scheme. | ||
184 | * | ||
185 | * @return string the URL scheme or false if none is provided. | ||
186 | */ | ||
187 | public function getScheme() | ||
188 | { | ||
189 | if (!isset($this->parts['scheme'])) { | ||
190 | return false; | ||
191 | } | ||
192 | return $this->parts['scheme']; | ||
193 | } | ||
194 | |||
195 | /** | ||
196 | * Get URL host. | ||
197 | * | ||
198 | * @return string the URL host or false if none is provided. | ||
199 | */ | ||
200 | public function getHost() | ||
201 | { | ||
202 | if (empty($this->parts['host'])) { | ||
203 | return false; | ||
204 | } | ||
205 | return $this->parts['host']; | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * Test if the Url is an HTTP one. | ||
210 | * | ||
211 | * @return true is HTTP, false otherwise. | ||
212 | */ | ||
213 | public function isHttp() | ||
214 | { | ||
215 | return strpos(strtolower($this->parts['scheme']), 'http') !== false; | ||
216 | } | ||
217 | } | ||