aboutsummaryrefslogtreecommitdiffhomepage
path: root/index.php
diff options
context:
space:
mode:
authorSebastien SAUVAGE <sebsauvage@sebsauvage.net>2013-08-03 22:10:04 +0200
committerSebastien SAUVAGE <sebsauvage@sebsauvage.net>2013-08-03 22:10:04 +0200
commit002ef0e5c8ed2bab38e205a5d54617780f25c3a9 (patch)
tree3d138b5f5a30db4fbe16d3bd6ae09182eb591994 /index.php
parentf6a6ca0aec6cc09ee76d827cc07d3c0ed66c8eb0 (diff)
downloadShaarli-002ef0e5c8ed2bab38e205a5d54617780f25c3a9.tar.gz
Shaarli-002ef0e5c8ed2bab38e205a5d54617780f25c3a9.tar.zst
Shaarli-002ef0e5c8ed2bab38e205a5d54617780f25c3a9.zip
Better encoding handling in title parsing
Thanks to a patch from Le Hollandais Volant.
Diffstat (limited to 'index.php')
-rw-r--r--index.php25
1 files changed, 23 insertions, 2 deletions
diff --git a/index.php b/index.php
index 8b5c912a..d88f471c 100644
--- a/index.php
+++ b/index.php
@@ -1545,8 +1545,29 @@ function renderPage()
1545 { 1545 {
1546 list($status,$headers,$data) = getHTTP($url,4); // Short timeout to keep the application responsive. 1546 list($status,$headers,$data) = getHTTP($url,4); // Short timeout to keep the application responsive.
1547 // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html 1547 // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) <head> in html
1548 if (strpos($status,'200 OK')!==false) $title=html_entity_decode(html_extract_title($data),ENT_QUOTES,'UTF-8'); 1548 if (strpos($status,'200 OK')!==false)
1549 1549 {
1550 // Look for charset in html header.
1551 preg_match('#<meta .*charset=.*>#Usi', $data, $meta);
1552
1553 // If found, extract encoding.
1554 if (!empty($meta[0]))
1555 {
1556 // Get encoding specified in header.
1557 preg_match('#charset="?(.*)"#si', $meta[0], $enc);
1558 // If charset not found, use utf-8.
1559 $html_charset = (!empty($enc[1])) ? strtolower($enc[1]) : 'utf-8';
1560 }
1561 else { $html_charset = 'utf-8'; }
1562
1563 // Extract title
1564 $title = html_extract_title($data);
1565 if (!empty($title))
1566 {
1567 // Re-encode title in utf-8 if necessary.
1568 $title = ($html_charset == 'iso-8859-1') ? utf8_encode($title) : $title;
1569 }
1570 }
1550 } 1571 }
1551 if ($url=='') $url='?'.smallHash($linkdate); // In case of empty URL, this is just a text (with a link that point to itself) 1572 if ($url=='') $url='?'.smallHash($linkdate); // In case of empty URL, this is just a text (with a link that point to itself)
1552 $link = array('linkdate'=>$linkdate,'title'=>$title,'url'=>$url,'description'=>$description,'tags'=>$tags,'private'=>0); 1573 $link = array('linkdate'=>$linkdate,'title'=>$title,'url'=>$url,'description'=>$description,'tags'=>$tags,'private'=>0);