A small application, under showCodeTo recommend glog that I like very much, gflag :) thanks to Google for making my life easier :)
In addition, it is quite convenient to read the database using OTL. I used OTL to encapsulate and write a dbreader. In this way, the interface for processing the database is basically the same as that for processing the text, it is much easier to completely block database operations.
/*** ===================================================== ========================================================** \ File quechaokafei. CC*\ Autho goldenlock* \ Description: due to the existence of "Nestle Coffee" in the title, the user cannot search for "Nestle Coffee ".*Solution: Read all items in the database. If the title is found
* "Nestle * coffee" "Coffee * Nestle" is added * Current dict/quechaokafei.txt in Blacklist * * ===================================================== ======================================================== */ # Define Private Public # Define Protected Public # Include <iostream> # include < String > # Include <vector> # include <fstream> # include "Utils/db_reader.h" # Include <algorithm> # include <boost/progress. HPP> # include <glog/logging. h> # include <gflags/gflags. h> # include <tr1/unordered_set> # include "Gbk_ch_converter.h" # Include "String_help.h" # Include "Config_help.h" # Include "Debug_help.h" # Include "Include/segmentor. H" # Include "Include/gbk_datrie.h" Using Namespace STD; define_string (type, "Simple" , "" ); Define_string (config, "Read_db.ini" ,"Database configuration file, read the title data" ); Define_string (section, "All_title" , "Read the title data" ); Define_string (O, "Dict/quechaokafei.txt" , "Extracted blacklist words" ); Define_int32 (min_len, 2, "Requirements for a single word> = 2 current" ); Define_string (prob_dir, "Testngramtitle" , "Souce dir of the probability analyzer" ); Struct Quechaokafeifunc {typedef STD: tr1: unordered_set < String > Hashset; hashset m_candidates; ch_convert: chconverter m_converter; Segment: probsegmentor m_seg; Segment: gbk_datrie _ m_trie; ofstream OFS; hour (): m_seg (hour ), m_trie (m_seg.m_seg.m_trie, m_seg.m_seg.m_encoder), OFS (flags_o.c_str ()){} Void Findquechaokafei ( Const Vector < String > & VEC ){ For (Size_t I = 0; I <Vec. Size ()-1; I ++ ){ If (VEC [I]. Size () <flags_min_len * 2 |! M_trie.search (VEC [I]) Continue ; For (Size_t J = I + 1; j <Vec. Size (); j ++ ){ If (VEC [J]. Size () <flags_min_len * 2) Continue ; If (M_trie.search (VEC [J]) { String S1 = VEC [I] + VEC [J]; String S2 = VEC [J] + VEC [I]; If (M_trie.search (S1) {m_candidates.insert (S1 );} If (M_trie.search (S2) {m_candidates.insert (S2 );}}}}} Void Writeresult () {STD: Copy (m_candidates.begin (), m_candidates.end (), ostream_iterator < String > (OFS, "\ N" );} Template <typename stream> Void Operator () (Stream & OS ){ String Key; vector < String > VEC; While (! OS. EOF () {OS> key; // --- Normalization processing key Key = m_converter.normalize (key ); If (Key. Empty ()) Continue ; Key = filterstring2 (key ); If (Key. Empty ()) Continue ; M_seg.maxsegment (Key, VEC); findquechaokafei (VEC );}}}; Void Run () {dbreader db_reader; db_reader.init (flags_config, flags_section); quechaokafeifunc quechaokafei_func; db_reader.process (quechaokafei_func); notify ();} Int Main ( Int Argc, Char * Argv []) {flags_logtostderr =True ; Google: initgooglelogging (argv [0]); Google: installfailuresignalhandler (); Int S = Google: parsecommandlineflags (& argc, & argv, False ); Boost: progress_timer timer; run (); Return 0 ;}