Weight calculation and slight modification can also be used for word segmentation, word frequency statistics, full-text and spam detection, etc.

Source: Internet
Author: User
Weight calculation and slight modification can also be used for word segmentation, word frequency statistics, full-text and spam detection, etc.
Efficiency is very objective. I cannot guarantee efficiency if you change it to other functions.

  1. /* Vim: set expandtab tabstop = 4 shiftwidth = 4 :*/
  2. // + ------------------------------------------------------------------------
  3. // Name: weight calculation
  4. // Description: slightly modified. It can also be used for word segmentation, word frequency statistics, full-text search, and spam detection.
  5. // Date: 2013/12/16 08:51
  6. // Authors: latel
  7. // + ------------------------------------------------------------------------
  8. //
  9. /* External call example */
  10. /*
  11. $ AItems = array (
  12. 'Chinaisbig ',
  13. 'Whichisnot ',
  14. 'Totalyrightforme ',
  15. );
  16. $ ATable = array (
  17. 'China, is | small ',
  18. 'China, big | me ',
  19. 'China, is | big, which | not, Me ',
  20. 'Totaly | right, for, Me ',
  21. );
  22. $ OWeight = new ttrie;
  23. $ OWeight-> newItems ($ aItems );
  24. $ AResult = $ oWeight-> newTable ($ aTable );
  25. */
  26. Class weight {
  27. Protected $ aDict = array ());
  28. Protected $ aItems = array ();
  29. Protected $ sLastRule;
  30. Protected $ aMatchs = array ();
  31. Protected $ aShow = array ();
  32. Private function init (){
  33. // Clear the matching table and output result of the record
  34. Unset ($ this-> aShow );
  35. }
  36. Public function newItems ($ mItems ){
  37. // Import a new project
  38. $ This-> aItems = (is_array ($ mItems ))? $ MItems: array ($ mItems );
  39. $ This-> init ();
  40. }
  41. Public function newTable (array $ aTable ){
  42. // Import a new table and generate a dictionary
  43. Foreach ($ aTable as $ iTableKey => $ sTableLine ){
  44. $ ATableLine = explode (',', str_replace ('|', $ sTableLine ));
  45. $ Setter = function ($ v, $ k, $ paraMeter ){
  46. $ K1 = $ paraMeter [0]; $ oWeight = $ paraMeter [1];
  47. $ OWeight-> genDict ($ v, $ k1 );
  48. };
  49. Array_walk ($ aTableLine, $ setter, array ($ iTableKey, $ this ));
  50. }
  51. $ This-> init ();
  52. }
  53. Public function getShow ($ sRule = 'Max '){
  54. // Obtain the final display result
  55. If (empty ($ this-> aItems) | empty ($ this-> aDict ))
  56. Return array ();
  57. If (empty ($ this-> aShow) | $ sRule! = $ This-> sLastRule)
  58. Return $ this-> genShow ($ sRule );
  59. Return $ this-> aShow;
  60. }
  61. Public function genShow ($ sRule ){
  62. $ AShow = array ();
  63. $ AMatchs = array ();
  64. $ Getter = function ($ v, $ k, $ oWeight) use (& $ aShow, & $ aMatchs, $ sRule ){
  65. $ T = array_count_values ($ oWeight-> matchWord ($ v ));
  66. $ AMatchs [] = $ t;
  67. Switch ($ sRule ){
  68. Case 'Max ':
  69. $ AShow [$ k] = array_keys ($ t, max ($ t ));
  70. Break;
  71. }
  72. };
  73. Array_walk ($ this-> aItems, $ getter, $ this );
  74. $ This-> aShow = $ aShow;
  75. $ This-> aMatchs = $ aMatchs;
  76. Return $ aShow;
  77. }
  78. Private function genDict ($ mWord, $ iKey = ''){
  79. $ IInsertPonit = count ($ this-> aDict );
  80. $ ICur = 0; // current node number
  81. Foreach (str_split ($ mWord) as $ iChar ){
  82. If (isset ($ this-> aDict [$ iCur] [$ iChar]) {
  83. $ ICur = $ this-> aDict [$ iCur] [$ iChar];
  84. Continue;
  85. }
  86. $ This-> aDict [$ iInsertPonit] = array ();
  87. $ This-> aDict [$ iCur] [$ iChar] = $ iInsertPonit;
  88. $ ICur = $ iInsertPonit;
  89. $ IInsertPonit ++;
  90. }
  91. $ This-> aDict [$ iCur] ['ACC '] [] = $ iKey;
  92. }
  93. Function matchWord ($ sLine ){
  94. $ ICur = $ iOffset = $ iPosition = 0;
  95. $ SLine. = "\ 0 ";
  96. $ ILen = strlen ($ sLine );
  97. $ AReturn = array ();
  98. While ($ iOffset <$ iLen ){
  99. $ SChar = $ sLine {$ iOffset };
  100. If (isset ($ this-> aDict [$ iCur] [$ sChar]) {
  101. $ ICur = $ this-> aDict [$ iCur] [$ sChar];
  102. If (isset ($ this-> aDict [$ iCur] ['ACC ']) {
  103. $ AReturn = array_merge ($ aReturn, $ this-> aDict [$ iCur] ['ACC ']);
  104. $ IPosition = $ iOffset + 1;
  105. $ ICur = 0;
  106. }
  107. } Else {
  108. $ ICur = 0;
  109. $ IOffset = $ iPosition;
  110. $ IPosition = $ iOffset + 1;
  111. }
  112. ++ $ IOffset;
  113. }
  114. Return $ aReturn;
  115. }
  116. }
  117. ?>

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.