Universal encoding Detector
Http://chardet.feedparser.org/
It provides sample code snippets
>>> Import urllib
>>> Urlread = Lambda URL: urllib. urlopen (URL). Read ()
>>> Import chardet
>>> Chardet. Detect (urlread ("http://google.cn /"))
{'Encoding': 'gb2312', 'confidence ': 0.99}
>>> Chardet. Detect (urlread ("http://yahoo.co.jp /"))
{'Encoding': 'euc-JP ', 'confidence': 0.99}
>>> Chardet. Detect (urlread ("http://amazon.co.jp /"))
{'Encoding': 'shift _ JIS ', 'confidence': 1}
>>> Chardet. Detect (urlread ("http://pravda.ru /"))
{'Encoding': 'windows-1251 ', 'confidence': 0.9355}
>>> Chardet. Detect (urlread ("http://auction.co.kr /"))
{'Encoding': 'euc-Kr ', 'confidence': 0.99}
>>> Chardet. Detect (urlread ("http://haaretz.co.il /"))
{'Encoding': 'windows-1255 ', 'confidence': 0.99}
>>> Chardet. Detect (urlread ("http://www.nectec.or.th/tindex.html "))
{'Encoding': 'Tis-620 ', 'confidence': 0.7675}
>>> Chardet. Detect (urlread ("http://feedparser.org/docs "))
{'Encoding': 'utf-8', 'confidence ': 0.99}