準備:
1、prototype.js,http://prototype.conio.net/,用於ajax載入漢字字型檔
2、漢字字型檔http://cn.minidx.com/index.php?option=com_docman&task=doc_download&gid=47
----------------------------------------------------------------------------------------------------------------------------------------------
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><title>中英文混合分詞</title><script language="javascript" src="prototype.js"></script><script language="javascript">var dict = "";var lastword = "";//ajax 裝載字型檔new Ajax.Request('js/dict1.txt',{onComplete:function(response){dict=response.responseText+"";}});var rs = [];function divide(text){ if(text.length==0) return true; var word = text.substring(0,1)+""; var regExp = /\w/; //英文 if(regExp.test(word)){ var tmp = text.replace(/^\s*(\w+)\s*.*$/,"$1"); text = text.replace(/^\s*\w+\s*/,""); rs.push(tmp); divide(text); return; } var words = []; var end = 0; var start = -1; while((start = dict.indexOf('\r\n'+word,end))!=-1){ end = dict.indexOf('\r\n',start+1); if(start==-1||end==-1) return false; if(start>end) return false; words.push(dict.substr(start,end-start).replace(/(\r\n|\s)/g,"")); } var tmp = ""; for(j=0;j<words.length;j++){ //找到最長的詞,當然也可以將所有詞保留 if(text.indexOf(words[j])!=-1&&words[j].length>tmp.length){ tmp=words[j]; } } //詞庫不存在的詞 if(tmp == ""){ tmp = word; }text=text.replace(tmp,"");if(tmp.replace(/\s/g,'')!="") rs.push(tmp); divide(text);}function dodivde(){ var text = $('word').value; rs = []; divide(text); $('dividresult').innerHTML=rs;}</script></head><body><input type="text" name="word" id="word" value="我Welcome歡迎to Mozilla Firefox Help" onblur="dodivde();" /><input name="do" type="button" value="DO IT" onclick="dodivde();" /><span id="dividresult"></span></body></html>
-------------------------------------------------------------------------------------------------------------------
自己弄字型檔注意事項:你的字型檔,首尾要有換行,換行必須是\r\n,編碼必須是utf8
關鍵是詞庫。
我的QQ群:
PHPer&Webgame&移動開發,群號:95303036