Efficiency is very objective. I cannot guarantee efficiency if you change it to other functions.
- /* Vim: set expandtab tabstop = 4 shiftwidth = 4 :*/
- // + ------------------------------------------------------------------------
- // Name: weight calculation
- // Description: slightly modified. It can also be used for word segmentation, word frequency statistics, full-text search, and spam detection.
- // Date: 2013/12/16 08:51
- // Authors: latel
- // + ------------------------------------------------------------------------
- //
- /* External call example */
- /*
- $ AItems = array (
- 'Chinaisbig ',
- 'Whichisnot ',
- 'Totalyrightforme ',
- );
- $ ATable = array (
- 'China, is | small ',
- 'China, big | me ',
- 'China, is | big, which | not, Me ',
- 'Totaly | right, for, Me ',
- );
- $ OWeight = new ttrie;
- $ OWeight-> newItems ($ aItems );
- $ AResult = $ oWeight-> newTable ($ aTable );
- */
- Class weight {
- Protected $ aDict = array ());
- Protected $ aItems = array ();
- Protected $ sLastRule;
- Protected $ aMatchs = array ();
- Protected $ aShow = array ();
- Private function init (){
- // Clear the matching table and output result of the record
- Unset ($ this-> aShow );
- }
- Public function newItems ($ mItems ){
- // Import a new project
- $ This-> aItems = (is_array ($ mItems ))? $ MItems: array ($ mItems );
- $ This-> init ();
- }
- Public function newTable (array $ aTable ){
- // Import a new table and generate a dictionary
- Foreach ($ aTable as $ iTableKey => $ sTableLine ){
- $ ATableLine = explode (',', str_replace ('|', $ sTableLine ));
- $ Setter = function ($ v, $ k, $ paraMeter ){
- $ K1 = $ paraMeter [0]; $ oWeight = $ paraMeter [1];
- $ OWeight-> genDict ($ v, $ k1 );
- };
- Array_walk ($ aTableLine, $ setter, array ($ iTableKey, $ this ));
- }
- $ This-> init ();
- }
- Public function getShow ($ sRule = 'Max '){
- // Obtain the final display result
- If (empty ($ this-> aItems) | empty ($ this-> aDict ))
- Return array ();
- If (empty ($ this-> aShow) | $ sRule! = $ This-> sLastRule)
- Return $ this-> genShow ($ sRule );
- Return $ this-> aShow;
- }
- Public function genShow ($ sRule ){
- $ AShow = array ();
- $ AMatchs = array ();
- $ Getter = function ($ v, $ k, $ oWeight) use (& $ aShow, & $ aMatchs, $ sRule ){
- $ T = array_count_values ($ oWeight-> matchWord ($ v ));
- $ AMatchs [] = $ t;
- Switch ($ sRule ){
- Case 'Max ':
- $ AShow [$ k] = array_keys ($ t, max ($ t ));
- Break;
- }
- };
- Array_walk ($ this-> aItems, $ getter, $ this );
- $ This-> aShow = $ aShow;
- $ This-> aMatchs = $ aMatchs;
- Return $ aShow;
- }
- Private function genDict ($ mWord, $ iKey = ''){
- $ IInsertPonit = count ($ this-> aDict );
- $ ICur = 0; // current node number
- Foreach (str_split ($ mWord) as $ iChar ){
- If (isset ($ this-> aDict [$ iCur] [$ iChar]) {
- $ ICur = $ this-> aDict [$ iCur] [$ iChar];
- Continue;
- }
- $ This-> aDict [$ iInsertPonit] = array ();
- $ This-> aDict [$ iCur] [$ iChar] = $ iInsertPonit;
- $ ICur = $ iInsertPonit;
- $ IInsertPonit ++;
- }
- $ This-> aDict [$ iCur] ['ACC '] [] = $ iKey;
-
- }
- Function matchWord ($ sLine ){
- $ ICur = $ iOffset = $ iPosition = 0;
- $ SLine. = "\ 0 ";
- $ ILen = strlen ($ sLine );
- $ AReturn = array ();
- While ($ iOffset <$ iLen ){
- $ SChar = $ sLine {$ iOffset };
- If (isset ($ this-> aDict [$ iCur] [$ sChar]) {
- $ ICur = $ this-> aDict [$ iCur] [$ sChar];
- If (isset ($ this-> aDict [$ iCur] ['ACC ']) {
- $ AReturn = array_merge ($ aReturn, $ this-> aDict [$ iCur] ['ACC ']);
- $ IPosition = $ iOffset + 1;
- $ ICur = 0;
- }
- } Else {
- $ ICur = 0;
- $ IOffset = $ iPosition;
- $ IPosition = $ iOffset + 1;
- }
- ++ $ IOffset;
- }
- Return $ aReturn;
- }
- }
- ?>
|