The efficiency is very objective, if you change to other uses that efficiency I don't promise.
- /* Vim:set expandtab tabstop=4 shiftwidth=4: */
- // +------------------------------------------------------------------------
- Name: Weight Calculation
- Description: Slightly modified, can also be used for word segmentation, frequency statistics, full-text search and garbage detection
- DATE:2013/12/16 08:51
- Authors:latel
- // +------------------------------------------------------------------------
- //
- /* External Invocation example */
- /*
- $aItems = Array (
- ' Chinaisbig ',
- ' Whichisnot ',
- ' Totalyrightforme ',
- );
- $aTable = Array (
- ' China,is|small ',
- ' China,big|me ',
- ' China,is|big,which|not,me ',
- ' Totaly|right,for,me ',
- );
- $oWeight = new Ttrie;
- $oWeight->newitems ($aItems);
- $aResult = $oWeight->newtable ($aTable);
- */
- Class Weight {
- Protected $aDict = Array (array ());
- Protected $aItems = Array ();
- protected $sLastRule;
- Protected $aMatchs = Array ();
- Protected $aShow = Array ();
- Private Function init () {
- Clears the record's match table and output results
- unset ($this->ashow);
- }
- Public Function NewItems ($mItems) {
- Import a new project
- $this->aitems = (Is_array ($mItems))? $mItems: Array ($mItems);
- $this->init ();
- }
- Public Function newtable (array $aTable) {
- Import a new table and generate a dictionary
- foreach ($aTable as $iTableKey = = $sTableLine) {
- $aTableLine = Explode (', ', Str_replace (' | ', ', ', $sTableLine));
- $setter = function ($v, $k, $paraMeter) {
- $k 1 = $paraMeter [0]; $oWeight = $paraMeter [1];
- $oWeight->gendict ($v, $k 1);
- };
- Array_walk ($aTableLine, $setter, Array ($iTableKey, $this));
- }
- $this->init ();
- }
- Public Function getshow ($sRule = ' Max ') {
- Get the final display result
- if (Empty ($this->aitems) | | empty ($this->adict))
- return Array ();
- if (Empty ($this->ashow) | | $sRule! = $this->slastrule)
- return $this->genshow ($sRule);
- return $this->ashow;
- }
- Public Function Genshow ($sRule) {
- $aShow = Array ();
- $aMatchs = Array ();
- $getter = function ($v, $k, $oWeight) use (& $aShow, & $aMatchs, $sRule) {
- $t = Array_count_values ($oWeight->matchword ($v));
- $aMatchs [] = $t;
- Switch ($sRule) {
- Case ' Max ':
- $aShow [$k] = Array_keys ($t, Max ($t));
- Break
- }
- };
- Array_walk ($this->aitems, $getter, $this);
- $this->ashow = $aShow;
- $this->amatchs = $aMatchs;
- return $aShow;
- }
- Private Function Gendict ($mWord, $iKey = ") {
- $iInsertPonit = count ($this->adict);
- $iCur = 0; Current node number
- foreach (Str_split ($mWord) as $iChar) {
- if (Isset ($this->adict[$iCur [$iChar])) {
- $iCur = $this->adict[$iCur] [$iChar];
- Continue
- }
- $this->adict[$iInsertPonit] = array ();
- $this->adict[$iCur] [$iChar] = $iInsertPonit;
- $iCur = $iInsertPonit;
- $iInsertPonit + +;
- }
- $this->adict[$iCur [' ACC '] [] = $iKey;
- }
- function Matchword ($sLine) {
- $iCur = $iOffset = $iPosition = 0;
- $sLine. = "n";
- $iLen = strlen ($sLine);
- $aReturn = Array ();
- while ($iOffset < $iLen) {
- $sChar = $sLine {$iOffset};
- if (Isset ($this->adict[$iCur [$sChar])) {
- $iCur = $this->adict[$iCur] [$sChar];
- if (Isset ($this->adict[$iCur [' ACC '])) {
- $aReturn = Array_merge ($aReturn, $this->adict[$iCur [' acc ']);
- $iPosition = $iOffset + 1;
- $iCur = 0;
- }
- } else {
- $iCur = 0;
- $iOffset = $iPosition;
- $iPosition = $iOffset + 1;
- }
- + + $iOffset;
- }
- return $aReturn;
- }
- }
- ?>
Copy Code |