]>
Commit | Line | Data |
---|---|---|
1 | <?php | |
2 | /** | |
3 | * Part of Text_LanguageDetect | |
4 | * | |
5 | * PHP version 5 | |
6 | * | |
7 | * @category Text | |
8 | * @package Text_LanguageDetect | |
9 | * @author Christian Weiske <cweiske@php.net> | |
10 | * @copyright 2011 Christian Weiske <cweiske@php.net> | |
11 | * @license http://www.debian.org/misc/bsd.license BSD | |
12 | * @version SVN: $Id$ | |
13 | * @link http://pear.php.net/package/Text_LanguageDetect/ | |
14 | */ | |
15 | ||
16 | /** | |
17 | * Provides a mapping between the languages from lang.dat and the | |
18 | * ISO 639-1 and ISO-639-2 codes. | |
19 | * | |
20 | * Note that this class contains only languages that exist in lang.dat. | |
21 | * | |
22 | * @category Text | |
23 | * @package Text_LanguageDetect | |
24 | * @author Christian Weiske <cweiske@php.net> | |
25 | * @copyright 2011 Christian Weiske <cweiske@php.net> | |
26 | * @license http://www.debian.org/misc/bsd.license BSD | |
27 | * @link http://www.loc.gov/standards/iso639-2/php/code_list.php | |
28 | */ | |
29 | class Text_LanguageDetect_ISO639 | |
30 | { | |
31 | /** | |
32 | * Maps all language names from the language database to the | |
33 | * ISO 639-1 2-letter language code. | |
34 | * | |
35 | * NULL indicates that there is no 2-letter code. | |
36 | * | |
37 | * @var array | |
38 | */ | |
39 | public static $nameToCode2 = array( | |
40 | 'albanian' => 'sq', | |
41 | 'arabic' => 'ar', | |
42 | 'azeri' => 'az', | |
43 | 'bengali' => 'bn', | |
44 | 'bulgarian' => 'bg', | |
45 | 'cebuano' => null, | |
46 | 'croatian' => 'hr', | |
47 | 'czech' => 'cs', | |
48 | 'danish' => 'da', | |
49 | 'dutch' => 'nl', | |
50 | 'english' => 'en', | |
51 | 'estonian' => 'et', | |
52 | 'farsi' => 'fa', | |
53 | 'finnish' => 'fi', | |
54 | 'french' => 'fr', | |
55 | 'german' => 'de', | |
56 | 'hausa' => 'ha', | |
57 | 'hawaiian' => null, | |
58 | 'hindi' => 'hi', | |
59 | 'hungarian' => 'hu', | |
60 | 'icelandic' => 'is', | |
61 | 'indonesian' => 'id', | |
62 | 'italian' => 'it', | |
63 | 'kazakh' => 'kk', | |
64 | 'kyrgyz' => 'ky', | |
65 | 'latin' => 'la', | |
66 | 'latvian' => 'lv', | |
67 | 'lithuanian' => 'lt', | |
68 | 'macedonian' => 'mk', | |
69 | 'mongolian' => 'mn', | |
70 | 'nepali' => 'ne', | |
71 | 'norwegian' => 'no', | |
72 | 'pashto' => 'ps', | |
73 | 'pidgin' => null, | |
74 | 'polish' => 'pl', | |
75 | 'portuguese' => 'pt', | |
76 | 'romanian' => 'ro', | |
77 | 'russian' => 'ru', | |
78 | 'serbian' => 'sr', | |
79 | 'slovak' => 'sk', | |
80 | 'slovene' => 'sl', | |
81 | 'somali' => 'so', | |
82 | 'spanish' => 'es', | |
83 | 'swahili' => 'sw', | |
84 | 'swedish' => 'sv', | |
85 | 'tagalog' => 'tl', | |
86 | 'turkish' => 'tr', | |
87 | 'ukrainian' => 'uk', | |
88 | 'urdu' => 'ur', | |
89 | 'uzbek' => 'uz', | |
90 | 'vietnamese' => 'vi', | |
91 | 'welsh' => 'cy', | |
92 | ); | |
93 | ||
94 | /** | |
95 | * Maps all language names from the language database to the | |
96 | * ISO 639-2 3-letter language code. | |
97 | * | |
98 | * @var array | |
99 | */ | |
100 | public static $nameToCode3 = array( | |
101 | 'albanian' => 'sqi', | |
102 | 'arabic' => 'ara', | |
103 | 'azeri' => 'aze', | |
104 | 'bengali' => 'ben', | |
105 | 'bulgarian' => 'bul', | |
106 | 'cebuano' => 'ceb', | |
107 | 'croatian' => 'hrv', | |
108 | 'czech' => 'ces', | |
109 | 'danish' => 'dan', | |
110 | 'dutch' => 'nld', | |
111 | 'english' => 'eng', | |
112 | 'estonian' => 'est', | |
113 | 'farsi' => 'fas', | |
114 | 'finnish' => 'fin', | |
115 | 'french' => 'fra', | |
116 | 'german' => 'deu', | |
117 | 'hausa' => 'hau', | |
118 | 'hawaiian' => 'haw', | |
119 | 'hindi' => 'hin', | |
120 | 'hungarian' => 'hun', | |
121 | 'icelandic' => 'isl', | |
122 | 'indonesian' => 'ind', | |
123 | 'italian' => 'ita', | |
124 | 'kazakh' => 'kaz', | |
125 | 'kyrgyz' => 'kir', | |
126 | 'latin' => 'lat', | |
127 | 'latvian' => 'lav', | |
128 | 'lithuanian' => 'lit', | |
129 | 'macedonian' => 'mkd', | |
130 | 'mongolian' => 'mon', | |
131 | 'nepali' => 'nep', | |
132 | 'norwegian' => 'nor', | |
133 | 'pashto' => 'pus', | |
134 | 'pidgin' => 'crp', | |
135 | 'polish' => 'pol', | |
136 | 'portuguese' => 'por', | |
137 | 'romanian' => 'ron', | |
138 | 'russian' => 'rus', | |
139 | 'serbian' => 'srp', | |
140 | 'slovak' => 'slk', | |
141 | 'slovene' => 'slv', | |
142 | 'somali' => 'som', | |
143 | 'spanish' => 'spa', | |
144 | 'swahili' => 'swa', | |
145 | 'swedish' => 'swe', | |
146 | 'tagalog' => 'tgl', | |
147 | 'turkish' => 'tur', | |
148 | 'ukrainian' => 'ukr', | |
149 | 'urdu' => 'urd', | |
150 | 'uzbek' => 'uzb', | |
151 | 'vietnamese' => 'vie', | |
152 | 'welsh' => 'cym', | |
153 | ); | |
154 | ||
155 | /** | |
156 | * Maps ISO 639-1 2-letter language codes to the language names | |
157 | * in the language database | |
158 | * | |
159 | * Not all languages have a 2 letter code, so some are missing | |
160 | * | |
161 | * @var array | |
162 | */ | |
163 | public static $code2ToName = array( | |
164 | 'ar' => 'arabic', | |
165 | 'az' => 'azeri', | |
166 | 'bg' => 'bulgarian', | |
167 | 'bn' => 'bengali', | |
168 | 'cs' => 'czech', | |
169 | 'cy' => 'welsh', | |
170 | 'da' => 'danish', | |
171 | 'de' => 'german', | |
172 | 'en' => 'english', | |
173 | 'es' => 'spanish', | |
174 | 'et' => 'estonian', | |
175 | 'fa' => 'farsi', | |
176 | 'fi' => 'finnish', | |
177 | 'fr' => 'french', | |
178 | 'ha' => 'hausa', | |
179 | 'hi' => 'hindi', | |
180 | 'hr' => 'croatian', | |
181 | 'hu' => 'hungarian', | |
182 | 'id' => 'indonesian', | |
183 | 'is' => 'icelandic', | |
184 | 'it' => 'italian', | |
185 | 'kk' => 'kazakh', | |
186 | 'ky' => 'kyrgyz', | |
187 | 'la' => 'latin', | |
188 | 'lt' => 'lithuanian', | |
189 | 'lv' => 'latvian', | |
190 | 'mk' => 'macedonian', | |
191 | 'mn' => 'mongolian', | |
192 | 'ne' => 'nepali', | |
193 | 'nl' => 'dutch', | |
194 | 'no' => 'norwegian', | |
195 | 'pl' => 'polish', | |
196 | 'ps' => 'pashto', | |
197 | 'pt' => 'portuguese', | |
198 | 'ro' => 'romanian', | |
199 | 'ru' => 'russian', | |
200 | 'sk' => 'slovak', | |
201 | 'sl' => 'slovene', | |
202 | 'so' => 'somali', | |
203 | 'sq' => 'albanian', | |
204 | 'sr' => 'serbian', | |
205 | 'sv' => 'swedish', | |
206 | 'sw' => 'swahili', | |
207 | 'tl' => 'tagalog', | |
208 | 'tr' => 'turkish', | |
209 | 'uk' => 'ukrainian', | |
210 | 'ur' => 'urdu', | |
211 | 'uz' => 'uzbek', | |
212 | 'vi' => 'vietnamese', | |
213 | ); | |
214 | ||
215 | /** | |
216 | * Maps ISO 639-2 3-letter language codes to the language names | |
217 | * in the language database. | |
218 | * | |
219 | * @var array | |
220 | */ | |
221 | public static $code3ToName = array( | |
222 | 'ara' => 'arabic', | |
223 | 'aze' => 'azeri', | |
224 | 'ben' => 'bengali', | |
225 | 'bul' => 'bulgarian', | |
226 | 'ceb' => 'cebuano', | |
227 | 'ces' => 'czech', | |
228 | 'crp' => 'pidgin', | |
229 | 'cym' => 'welsh', | |
230 | 'dan' => 'danish', | |
231 | 'deu' => 'german', | |
232 | 'eng' => 'english', | |
233 | 'est' => 'estonian', | |
234 | 'fas' => 'farsi', | |
235 | 'fin' => 'finnish', | |
236 | 'fra' => 'french', | |
237 | 'hau' => 'hausa', | |
238 | 'haw' => 'hawaiian', | |
239 | 'hin' => 'hindi', | |
240 | 'hrv' => 'croatian', | |
241 | 'hun' => 'hungarian', | |
242 | 'ind' => 'indonesian', | |
243 | 'isl' => 'icelandic', | |
244 | 'ita' => 'italian', | |
245 | 'kaz' => 'kazakh', | |
246 | 'kir' => 'kyrgyz', | |
247 | 'lat' => 'latin', | |
248 | 'lav' => 'latvian', | |
249 | 'lit' => 'lithuanian', | |
250 | 'mkd' => 'macedonian', | |
251 | 'mon' => 'mongolian', | |
252 | 'nep' => 'nepali', | |
253 | 'nld' => 'dutch', | |
254 | 'nor' => 'norwegian', | |
255 | 'pol' => 'polish', | |
256 | 'por' => 'portuguese', | |
257 | 'pus' => 'pashto', | |
258 | 'rom' => 'romanian', | |
259 | 'rus' => 'russian', | |
260 | 'slk' => 'slovak', | |
261 | 'slv' => 'slovene', | |
262 | 'som' => 'somali', | |
263 | 'spa' => 'spanish', | |
264 | 'sqi' => 'albanian', | |
265 | 'srp' => 'serbian', | |
266 | 'swa' => 'swahili', | |
267 | 'swe' => 'swedish', | |
268 | 'tgl' => 'tagalog', | |
269 | 'tur' => 'turkish', | |
270 | 'ukr' => 'ukrainian', | |
271 | 'urd' => 'urdu', | |
272 | 'uzb' => 'uzbek', | |
273 | 'vie' => 'vietnamese', | |
274 | ); | |
275 | ||
276 | /** | |
277 | * Returns the 2-letter ISO 639-1 code for the given language name. | |
278 | * | |
279 | * @param string $lang English language name like "swedish" | |
280 | * | |
281 | * @return string Two-letter language code (e.g. "sv") or NULL if not found | |
282 | */ | |
283 | public static function nameToCode2($lang) | |
284 | { | |
285 | $lang = strtolower($lang); | |
286 | if (!isset(self::$nameToCode2[$lang])) { | |
287 | return null; | |
288 | } | |
289 | return self::$nameToCode2[$lang]; | |
290 | } | |
291 | ||
292 | /** | |
293 | * Returns the 3-letter ISO 639-2 code for the given language name. | |
294 | * | |
295 | * @param string $lang English language name like "swedish" | |
296 | * | |
297 | * @return string Three-letter language code (e.g. "swe") or NULL if not found | |
298 | */ | |
299 | public static function nameToCode3($lang) | |
300 | { | |
301 | $lang = strtolower($lang); | |
302 | if (!isset(self::$nameToCode3[$lang])) { | |
303 | return null; | |
304 | } | |
305 | return self::$nameToCode3[$lang]; | |
306 | } | |
307 | ||
308 | /** | |
309 | * Returns the language name for the given 2-letter ISO 639-1 code. | |
310 | * | |
311 | * @param string $code Two-letter language code (e.g. "sv") | |
312 | * | |
313 | * @return string English language name like "swedish" | |
314 | */ | |
315 | public static function code2ToName($code) | |
316 | { | |
317 | $lang = strtolower($code); | |
318 | if (!isset(self::$code2ToName[$code])) { | |
319 | return null; | |
320 | } | |
321 | return self::$code2ToName[$code]; | |
322 | } | |
323 | ||
324 | /** | |
325 | * Returns the language name for the given 3-letter ISO 639-2 code. | |
326 | * | |
327 | * @param string $code Three-letter language code (e.g. "swe") | |
328 | * | |
329 | * @return string English language name like "swedish" | |
330 | */ | |
331 | public static function code3ToName($code) | |
332 | { | |
333 | $lang = strtolower($code); | |
334 | if (!isset(self::$code3ToName[$code])) { | |
335 | return null; | |
336 | } | |
337 | return self::$code3ToName[$code]; | |
338 | } | |
339 | } |