There are 6,000 keywords, divided into 3 batches.
A batch of replacement replace, a batch of encountered need to audit censor, the last batch of encountered on the ban release banned.
The Design data table is as follows:
mysql> desc tbl_censor;+-------------+----------------------+----- -+-----+---------+----------------+| Field | Type | Null | Key | Default | Extra |+-------------+----------------------+------+-----+---------+----------------+| ID | smallint (6) unsigned | NO | PRI | NULL | auto_increment | | Censortype | smallint (6) | NO | | 1 | || Find | varchar (120) | NO | UNI | | || Replacement | varchar (255) | NO | | | || Extra | varchar (255) | NO | | | || Uptime | Int (11) | YES | | NULL | || Enable | Int (1) | NO | | 1 | |+-------------+----------------------+------+-----+---------+----------------+7 rows in Set (0.01 sec)
Because there are more than 6,000 keywords, use the strstr of foreach? or Preg_match?
The pursuit of efficiency, the amount of submissions per hour is more than 100,000 articles.
Just wrote a:
php
namespace App\helpers;use app\models\other\censor;use app\models\other\censorlog;class censorhelper{public $id; Public $data; Public $match _banned; Public $match _censor; Public function __construct ($id = ' censor ') {$this->id = $id; $this->match_banned = []; $this->match_censor = []; $this->data = $this->getdata (); /** * @description Gets the regular expression * @return array|mixed */Public Function GetData () {$data = \yii :: $app->cache->get ($this->id); if (empty ($data)) {$words = Censor::find ()->where ([' Enable ' = 1])->a Ndwhere (['! = ', ' replacement ', '])->orderby ([' replacement ' = = SORT_ASC, ' find ' = ' sort_desc]) ->asarray ()->all (); $censor = []; $banned = []; $replace = []; foreach ($words as $row) {switch ($row [' ReplacEment ']) {case ' {censor} ': $censor [] = $row [' Find ']; Break Case ' {banned} ': $banned [] = $row [' Find ']; Break Default: $replace [' from '] [] = $row [' Replacement ']; $replace [' to '] [] = $row [' Find ']; Break }} if ($censor | | $banned) {$data = [' censor ' = = $this->g Enerateregularexpression ($censor), ' banned ' = $this->generateregularexpression ($banned), ' Replace ' = $replace,]; \yii:: $app->cache->set ($this->id, $data); }} return $data; /** * @describe Generate Regular expression * @param array $words * @return String */Public function Generateregulare Xpression (array $words) { $regular = implode (' | ', Array_map (' Preg_quote ', $words)); Return "/$regular/I"; The Public function check ($string) {$this->banned ($string); $this->censor ($string); } Public Function censor ($string) {if (!empty ($this->data[' censor ') && Preg_match ($this->data [' censor '], $string, $matches)) {$this->match_censor = Array_merge ($this->match_censor, $matches [0]); }} Public function banned ($string) {if (!empty ($this->data[' banned ') && Preg_match ($th is->data[' banned ', $string, $matches)) {$this->match_banned = Array_merge ($this->match_banned, $MATC Hes[0]); }}//Reload Public Function flush () {\yii:: $app->cache->delete ($this->id); $this->getdata (); }/** * @describe replace * @param $string * @return Mixed */Public function replace ($string) { Return!empty ($this->data[' rePlace '])? Str_replace ($this->data[' replace ' [' from '], $this->data[' replace ' [' to '], $string): $string; }/** * @return String */Public Function Getlevel () {if (!empty ($this->match_banned)) { return ' banned '; } else if (!empty ($this->match_censor)) {return ' censor '; } else {return ' pass '; }}/** * @describe add record * @param $tableId * @param $dataId */Public Function Addlog ($tableId, $ Dataid) {$log = new Censorlog (); $log->DATATB = $tableId; $log->dataid = $dataId; $log->matchcensor = Implode (', ', $this->match_censor); $log->matchbanned = Implode (', ', $this->match_banned); $log->addtime = time (); if (!\yii:: $app->user->isguest) {$log->uid = \yii:: $app->user->getid (); $log->uname = \yii:: $app->user->getuname (); } $log->ip = IphelpeR::getip (); $log->iploc = iphelper::getlocation ($log->ip); $log->save (); }}
Reply content:
There are 6,000 keywords, divided into 3 batches.
A batch of replacement replace, a batch of encountered need to audit censor, the last batch of encountered on the ban release banned.
The Design data table is as follows:
mysql> desc tbl_censor;+-------------+----------------------+----- -+-----+---------+----------------+| Field | Type | Null | Key | Default | Extra |+-------------+----------------------+------+-----+---------+----------------+| ID | smallint (6) unsigned | NO | PRI | NULL | auto_increment | | Censortype | smallint (6) | NO | | 1 | || Find | varchar (120) | NO | UNI | | || Replacement | varchar (255) | NO | | | || Extra | varchar (255) | NO | | | || Uptime | Int (11) | YES | | NULL | || Enable | Int (1) | NO | | 1 | |+-------------+----------------------+------+-----+---------+----------------+7 rows in Set (0.01 sec)
Because there are more than 6,000 keywords, use the strstr of foreach? or Preg_match?
The pursuit of efficiency, the amount of submissions per hour is more than 100,000 articles.
Just wrote a:
php
namespace App\helpers;use app\models\other\censor;use app\models\other\censorlog;class censorhelper{public $id; Public $data; Public $match _banned; Public $match _censor; Public function __construct ($id = ' censor ') {$this->id = $id; $this->match_banned = []; $this->match_censor = []; $this->data = $this->getdata (); /** * @description Gets the regular expression * @return array|mixed */Public Function GetData () {$data = \yii :: $app->cache->get ($this->id); if (empty ($data)) {$words = Censor::find ()->where ([' Enable ' = 1])->a Ndwhere (['! = ', ' replacement ', '])->orderby ([' replacement ' = = SORT_ASC, ' find ' = ' sort_desc]) ->asarray ()->all (); $censor = []; $banned = []; $replace = []; foreach ($words as $row) {switch ($row [' ReplacEment ']) {case ' {censor} ': $censor [] = $row [' Find ']; Break Case ' {banned} ': $banned [] = $row [' Find ']; Break Default: $replace [' from '] [] = $row [' Replacement ']; $replace [' to '] [] = $row [' Find ']; Break }} if ($censor | | $banned) {$data = [' censor ' = = $this->g Enerateregularexpression ($censor), ' banned ' = $this->generateregularexpression ($banned), ' Replace ' = $replace,]; \yii:: $app->cache->set ($this->id, $data); }} return $data; /** * @describe Generate Regular expression * @param array $words * @return String */Public function Generateregulare Xpression (array $words) { $regular = implode (' | ', Array_map (' Preg_quote ', $words)); Return "/$regular/I"; The Public function check ($string) {$this->banned ($string); $this->censor ($string); } Public Function censor ($string) {if (!empty ($this->data[' censor ') && Preg_match ($this->data [' censor '], $string, $matches)) {$this->match_censor = Array_merge ($this->match_censor, $matches [0]); }} Public function banned ($string) {if (!empty ($this->data[' banned ') && Preg_match ($th is->data[' banned ', $string, $matches)) {$this->match_banned = Array_merge ($this->match_banned, $MATC Hes[0]); }}//Reload Public Function flush () {\yii:: $app->cache->delete ($this->id); $this->getdata (); }/** * @describe replace * @param $string * @return Mixed */Public function replace ($string) { Return!empty ($this->data[' rePlace '])? Str_replace ($this->data[' replace ' [' from '], $this->data[' replace ' [' to '], $string): $string; }/** * @return String */Public Function Getlevel () {if (!empty ($this->match_banned)) { return ' banned '; } else if (!empty ($this->match_censor)) {return ' censor '; } else {return ' pass '; }}/** * @describe add record * @param $tableId * @param $dataId */Public Function Addlog ($tableId, $ Dataid) {$log = new Censorlog (); $log->DATATB = $tableId; $log->dataid = $dataId; $log->matchcensor = Implode (', ', $this->match_censor); $log->matchbanned = Implode (', ', $this->match_banned); $log->addtime = time (); if (!\yii:: $app->user->isguest) {$log->uid = \yii:: $app->user->getid (); $log->uname = \yii:: $app->user->getuname (); } $log->ip = IphelpeR::getip (); $log->iploc = iphelper::getlocation ($log->ip); $log->save (); }}
The Trie tree algorithm works best.
PHP keyword filtering extension, which relies on Libdatrie (C + + implementation of the Trie algorithm).
Your sensitive word matches, you don't need a regular, just a simple match or a replacement.
Keywords are divided into three types of memcached.
Then the article to match, should be from the toughest banned to match, followed by the keyword to censor, and finally can replace the sensitive words.
1 No release if encountered. + Str_pos
2 Meeting Needs review = Str_pos
3 Replacement = Str_replace