- /**
- * Forbidden Word Filter
- * Execution efficiency: 0.05 seconds per piece
- * @author Liuxu
- *
- */
- Class Logic_blackword
- {
- Const App_forum = 1;
- Const APP_BLOG = 2;
- Const APP_VOTE = 3;
- /**
- * Filter to get forbidden words
- * @param unknown $txt
- * @return ambigous
- */
- Public Function gethitlist ($txt)
- {
- $hitList = Array ();
- Filtering Forbidden words in batches
- $max = $this->getmax ();
- if ($max)
- {
- $size = 1000;
- $last = Ceil ($max/$size);
- for ($page =1; $page <= $last; $page + +)
- {
- $result = $this->gethitlistbypage ($txt, $page, $size);
- if ($result) $hitList = Array_merge ($hitList, $result);
- }
- }
- $hitList 2 = array ();
- foreach ($hitList as $hit = $type)
- {
- $hitList 2[$type] [] = $hit;
- }
- return $hitList 2;
- }
- Private Function Getmax ()
- {
- $redis = Rds::factory ();
- $memKey = ' Blackword_max ';
- $max = $redis->get ($memKey);
- if ($max ===false)
- {
- $max = 0;
- $blackWord = new Model_blackword_blackword ();
- $para [' field '] = "max (ID) as Max";
- $result = $blackWord->search ($para);
- if (Isset ($result [0][' Max ')) $max = $result [0][' Max '];
- $redis->setex ($memKey, $max);
- }
- return $max;
- }
- /**
- * Batch filter to get banned words
- * @param unknown $txt
- * @param number $page
- * @param number $size
- * @return Multitype:ambigous
- */
- Private Function Gethitlistbypage ($txt, $page =1, $size =1000)
- {
- $hitList = Array ();
- To get the forbidden word tree in batches
- $wordTree = $this->getwordtreebypage ($page, $size);
- $txt = Strip_tags ($txt);
- $txt = preg_replace ('/[^a-za-z0-9\\x{4e00}-\\x{9fa5}]/iu ', ' ', $txt);
- $len = Mb_strlen ($txt, ' UTF-8 ');
- for ($i =0; $i < $len; $i + +)
- {
- $char = Mb_substr ($txt, $i, 1, ' UTF-8 ');
- if (Isset ($wordTree [$char]))
- {
- $result = $this->gethitlistbytree (mb_substr ($txt, $i, A, ' UTF-8 '), $wordTree);
- if ($result)
- {
- foreach ($result as $hit = $type)
- {
- $hitList [$hit] = $type;
- }
- }
- }
- }
- return $hitList;
- }
- /**
- * Whether to ban words
- * @param str $txt
- * @param arr $wordTree
- * @return Multitype:unknown
- */
- Private Function Gethitlistbytree ($txt,& $wordTree)
- {
- $len = Mb_strlen ($txt, ' UTF-8 ');
- $point = & $wordTree;
- $hit = ";
- $hitList = Array ();
- for ($i =0; $i < $len; $i + +)
- {
- $char = Mb_substr ($txt, $i, 1, ' UTF-8 ');
- if (Isset ($point [$char]))
- {
- $hit. = $char;
- $point = & $point [$char];
- if (Isset ($point [' type ']))//Match succeeded
- {
- $hitList [$hit] = $point [' type '];
- }
- }
- Else
- {
- Break
- }
- }
- return $hitList;
- }
- /**
- * Get banned word trees in batches
- * @param int $page
- * @param int $size
- * @return Arr:
- */
- Private Function Getwordtreebypage ($page =1, $size =1000)
- {
- $redis = Rds::factory ();
- $memKey = ' blackword_tree_ '. $page. ' _ '. $size;
- $wordTree = $redis->get ($memKey);
- if ($wordTree ===false)
- {
- $wordTree = Array ();
- $blackWord = new Model_blackword_blackword ();
- $start = ($page-1) * $size;
- $end = $start + $size;
- $para [' where '] = "Status=1 and id>". $start. " and id<= ". $end;
- $result = $blackWord->search ($para);
- if ($result)
- {
- foreach ($result as $value)
- {
- if ($value [' word '])
- {
- $value [' word '] = preg_split ('/(? $point = & $wordTree;
- foreach ($value [' word '] as $char)
- {
- $point = & $point [$char];
- }
- $point [' type '] = $value [' type '];
- }
- }
- }
- $redis->setex ($memKey, $wordTree);
- }
- return $wordTree;
- }
- }
Copy Code
|