$str="woxiangyao_genghao-de-zifuchuan";
$arr = array('geng','de','cdaefad');
foreach($arr as $v){
if(strpos($str, $v) !== false){ echo $str; exit;}
}
有沒有效率更高的寫法?
回複內容:
$str="woxiangyao_genghao-de-zifuchuan";
$arr = array('geng','de','cdaefad');
foreach($arr as $v){
if(strpos($str, $v) !== false){ echo $str; exit;}
}
有沒有效率更高的寫法?
我將問題理解成如何判斷內容是否包含敏感詞,題主可將敏感詞產生成字典樹
,然後再尋找內容是否包含關鍵詞
下面是一個簡單的PHP字典樹
的樣本,供參考
class TrieTree{ public $tree = array(); /** * 增加關鍵詞到字典樹 * * @param string $utf8_str */ public function add($utf8_str) { $chars = &UTF8Util::getChars($utf8_str); // 串結尾字元 $chars[] = null; $count = count($chars); $T = &$this->tree; for ($i = 0; $i < $count; $i ++) { $c = $chars[$i]; if (! array_key_exists($c, $T)) { // 插入新字元,關聯陣列 $T[$c] = array(); } $T = &$T[$c]; } return $this; } /** * 從字典樹移除關鍵詞 * * @param string $utf8_str */ public function remove($utf8_str) { $chars = &UTF8Util::getChars($utf8_str); $chars[] = null; // 先保證此串在樹中 if ($this->_find($chars)) { $chars[] = null; $count = count($chars); $T = &$this->tree; for ($i = 0; $i < $count; $i ++) { $c = $chars[$i]; // 表明僅有此串 if (count($T[$c]) == 1) { unset($T[$c]); return; } $T = &$T[$c]; } } return $this; } /** * 從字典樹尋找關鍵詞 * * @param string $utf8_str * @return boolean */ public function exists($utf8_str) { $chars = &UTF8Util::getChars($utf8_str); $chars[] = null; return $this->_find($chars); } private function _find(&$chars) { $count = count($chars); $T = &$this->tree; for ($i = 0; $i < $count; $i ++) { $c = $chars[$i]; if (! array_key_exists($c, $T)) { return false; } $T = &$T[$c]; } return true; } /** * 是否含有關鍵詞 * * @param string $utf8_str * @param boolean $do_count * @return boolean|number */ public function contain($utf8_str, $do_count = false) { $chars = &UTF8Util::getChars($utf8_str); $chars[] = null; $len = count($chars); $Tree = &$this->tree; $count = 0; for ($i = 0; $i < $len; $i ++) { $c = $chars[$i]; // 起始字元匹配 if (array_key_exists($c, $Tree)) { $T = &$Tree[$c]; for ($j = $i + 1; $j < $len; $j ++) { $c = $chars[$j]; if (array_key_exists(null, $T)) { if ($do_count) { $count ++; } else { return true; } } if (! array_key_exists($c, $T)) { break; } $T = &$T[$c]; } } } return $do_count ? $count : false; } /** * 批量檢查是否包含關鍵詞 * * @param array $str_array * @return boolean */ public function containMulti($str_array) { if (\is_array($str_array)) { foreach ($str_array as $str) { if ($this->contain($str)) { return true; } } } return false; } /** * 匯出序列化後的字典樹 * * @return string */ public function export() { return serialize($this->tree); } /** * 匯入序列化後的字典樹 * * @param string $str */ public function import($str) { $this->tree = unserialize($str); }}class UTF8Util{ public static function getChars($utf8_str) { $s = $utf8_str; $len = strlen($s); if ($len == 0) return array(); $chars = array(); for ($i = 0; $i < $len; $i ++) { $c = $s[$i]; $n = ord($c); // 0xxx xxxx, asci, single if (($n >> 7) == 0) { $chars[] = $c; } else // 1111 xxxx, first in four char if (($n >> 4) == 15) { if ($i < $len - 3) { $chars[] = $c . $s[$i + 1] . $s[$i + 2] . $s[$i + 3]; $i += 3; } } else // 111x xxxx, first in three char if (($n >> 5) == 7) { if ($i < $len - 2) { $chars[] = $c . $s[$i + 1] . $s[$i + 2]; $i += 2; } } else // 11xx xxxx, first in two char if (($n >> 6) == 3) { if ($i < $len - 1) { $chars[] = $c . $s[$i + 1]; $i ++; } } } return $chars; }}
如果不考慮語言角度,應該用AC自動機來做比較快