used a strings. Newreplacer,replacer. Replace (), which can be replaced by a priority and can handle Chinese string parameters after being passed to the parameter.
Feel powerful, especially easy to use. For its search and priority how to deal with a little interest, take the time to study the next source, in this record personal understanding.
Package main//author:xcl//2014-1-20 Record import ("FMT" "strings") func main () {patterns: = []string{"Y", "2 5 "," Medium "," Country "," Medium work "," Guy ",}/* Patterns: = make ([]string,270 * 2) for I: =0;I&L T *2;i++{Patterns[i] = fmt. Sprintf ("%d", I)} */replacer: = Strings. Newreplacer (Patterns ...) Format: = "China (country)--strfmt (Guy)": = Replacer. Replace (format) newreplacer (patterns ...); Fmt. Println ("\nmain () replacer. Replace old= ", format) fmt. Println ("Main () Replacer. Replace new= ", strfmt)}func newreplacer (Oldnew ... string) {r: = Makegenericreplacer (oldnew) Val,keylen,found: = R.look Up ("Medium", true) fmt. Println ("\nnewreplacer () Val:", Val, "Keylen:", Keylen, "found:", found) Val,keylen,found = R.lookup ("Medium Work", true) FMT.P Rintln ("Newreplacer () Val:", Val, "Keylen:", Keylen, "found:", found) Val,keylen,found = R.lookup ("Y", false) Fmt. Println ("Newreplacer () y val:", Val, "Keylen:", Keylen, "found:", found) /* Val,keylen,found: = R.lookup ("2", true) fmt. Println ("\nnewreplacer () 2 val:", Val, "Keylen:", Keylen, "found:", found) Val,keylen,found = R.lookup ("3", true) fmt. Println ("\nnewreplacer () 3 val:", Val, "Keylen:", Keylen, "found:", found) */}type genericreplacer struct {root Trienode A dictionary tree//tablesize is the size of a trie node ' s lookup table. It is the number//of the unique key bytes. tablesize int//mapping maps from key bytes to a dense index for trienode.table. mapping [256]byte}func makegenericreplacer (oldnew []string) *genericreplacer {r: = new (Genericreplacer)//Find E Ach byte used, then assign them all an index. For I: = 0; I < Len (oldnew); i + = 2 {//step 2. The first one is pattern key: = Oldnew[i] FMT. Println ("\nmakegenericreplacer () for key=", key)//key[j]=utf8 stores one of the three encoding positions of the kanji, such as 228, then sets its corresponding position to 1//That is r.mapping[2 = 1 for J: = 0; J < Len (key); J + + {R.mapping[key[j]] = 1 FMT. Println ("MAkegenericreplacer () key[", J,"]= ", Key[j])}} for _, B: = range r.mapping {r.tablesize + = Int (b) } FMT. Println ("Makegenericreplacer () r.tablesize=", R.tablesize) var index byte for I, B: = range R.mapping {if b = = 0 {R.mapping[i] = byte (r.tablesize)} else {//based on array character encoding position, index r.mapping[i] = Index FMT. Println ("Makegenericreplacer () r.mapping[", I, "] =", R.mapping[i]) index++}}//ensure root nod E uses a lookup table (for performance). R.root.table = Make ([]*trienode, r.tablesize)//Put Key,val into the dictionary tree, notice Priority=len (oldnew)-I, which is the higher the value of the array. The higher the level for I: = 0; I < Len (oldnew); i + = 2 {r.root.add (Oldnew[i], oldnew[i+1], Len (oldnew)-I, R)} return r}type trienode struct {value str ing priority int prefix string next *trienode table []*trienode}func (t *trienode) Add (Key, Val string, pri ority int, r *genericreplacer) {FMT. PrinTLN ("Trienode->add () val=", Val, "key=", key) if key = = "" {if t.priority = = 0 {T.value = val T.priority = Priority FMT. Println ("Trienode->add () t.priority==", Priority)} return} if T.prefix! = "" {//handle the already prefixed node Need to split the prefix among multiple nodes. var n int//length of the longest common prefix for; n < Len (t.prefix) && n < len (key); n++ {//prefix vs. key if t.prefix[n]! = Key[n] {break}} if n = = Len ( T.prefix) {//same, continue to place below T.next.add (key[n:], Val, priority, R)} else if n = = 0 {//none of the same//F Irst byte differs, start a new lookup table here. Looking up/currently t.prefix[0] would leads to prefixnode, and//looking up key[0] would l EAD to Keynode. var prefixnode *trienode If Len (t.prefix) = = 1 {//If prefix is just a byte character encoding, it hangs under the node Prefixnode = T.next} else {//if not, add the remaining new trie tree Prefixnode = &am p;trienode{prefix:t.prefix[1:], Next:t.next,}} Keynode: = new (Trienode) t.table = make ([]*trienode, R.tablesize)//lookup () if node.table! = Nil T.table[r.mapping[t.prefix[0]] = Prefixnode t.table[r.mapping[key[0]] = Keynode t.pref IX = "" T.next = Nil Keynode.add (key[1:], Val, priority, R)} else {//Insert NE W node after the common section of the prefix. Next: = &trienode{prefix:t.prefix[n:], Next:t.next,} t.prefi x = t.prefix[:n] T.next = Next Next.add (key[n:], Val, priority, R)}} else if t.table! = Nil {//Insert into existing table. M: = r.mapping[key[0]] if T.taBLE[M] = = Nil {T.table[m] = new (Trienode)} t.table[m].add (key[1:], Val, priority, R)//build Tree } else {T.prefix = key T.next = new (Trienode) T.next.add ("", Val, priority, R)}}func (R * Genericreplacer) lookup (s string, ignoreroot bool) (Val string, Keylen int,found bool) {//Iterate down the trie to th e end, and grab the value and Keylen with//the highest priority. Bestpriority: = 0 Node: = &r.root N: = 0 for node! = Nil {if node.priority > bestpriority &&A MP;! (ignoreroot && node = = &r.root) {bestpriority = node.priority val = node.value Keylen = n found = True } if s = = "" {break} if node.table! = Nil {index: = r.mapping[s[0]] if int (index) = = r.tablesize {//character encoding the first byte is not in the table, break find breaks} node = Node.tabl E[index] s = S[1:] n++} else if node.prefix! = "" && hasprefix (S, node.prefix) {//character encoding not the first byte of a section The point retains key in prefix, so the analysis prefix to continue looking for other bytes n + = Len (node.prefix) s = S[len (node.prefix):] Node = Node.next//continue to find other characters other than the same prefix} else {break}} return}//Hasprefix tests whether the St Ring S begins with Prefix.func Hasprefix (s, prefix string) bool {return Len (s) >= len (prefix) && S[0:len (pr Efix)] = = prefix}
Recording:
Only one byte in the ASCII range, such as Y (121)
Each character in the UTF8 is three bytes. As in (228,184,173)
Build Tree:
If it's a new first word or phrase
Advanced} else if t.table! = Nil {
And then in else, the middle will t.prefix = key, put the key value in prefix, and "" to the next node
Finally execute if key = = "" && t.priority = = 0 {, T.value = Val
That is, the character encoding of the key (the first byte) corresponds to the root.table position, which points to another character compiling node, and the prefix of the middle node to save the key value.
The last node, save the corresponding Val and priority.
If it is an incoming word or phrase, start with the root.table position of the first byte encoded by the key character, and then search
} else if t.table! = Nil {
If there is already a prefix, compare the IF t.prefix! = "" {
1, if the current prefix and key exactly the same, then continue to build the tree child node
2. If the prefix is completely different from key, build a new tree
Prefixnode, Keynode.
As for why t.table = make ([]*trienode, R.tablesize), is to reserve the mapping space.
So it does so, not t.table[0],t.table[1].
T.table[r.mapping[t.prefix[0]] = Prefixnode
T.table[r.mapping[key[0]] = Keynode
3. Part of the same, jump directly to T.prefix[n:], and then start building the tree sub-node from Key[n:]
Priority :
The definition here is that the higher the number, the higher the priority level
If key = = "" {//bytes in the middle of character encoding
If t.priority = = 0 {//If you have defined priority, skip it, add the existing level
Corresponding to the {, medium-work} This, although there are "middle", but "medium", the priority is high, so "medium" the corresponding value is found but will not return.
If node.priority > bestpriority {bestpriority = node.priority}
For example: Zhong Gong (priority=4), Medium (priority=2)
Patterns
"Chinese worker", "guy",
"Zhong", "Guo",
The
Lookup () bestpriority:0 node.priority:0 Value:prefix:
Lookup () bestpriority:0 node.priority:0 value:prefix:??
Lookup () bestpriority:0 node.priority:2 value: Guo Prefix: work
Newreplacer () Val: Guo Keylen:3 found:true
Lookup () bestpriority:0 node.priority:0 Value:prefix:
Lookup () bestpriority:0 node.priority:0 value:prefix:??
Lookup () bestpriority:0 node.priority:2 value: Guo Prefix: work
Lookup () Bestpriority:2 node.priority:4 value: Guy prefix:
Newreplacer () Middle job Val: Guy Keylen:6 Found:true
Main () Replacer. Replace old= (China)--China Engineering (Guy)
Main () Replacer. Replace new= Country (country)--Guy (Guy)
If you adjust the order, put the country in advance, you will find that the following results:
Patterns
"Zhong", "Guo",
"Chinese worker", "guy",
The
Lookup () bestpriority:0 node.priority:0 Value:prefix:
Lookup () bestpriority:0 node.priority:0 value:prefix:??
Lookup () bestpriority:0 node.priority:4 value: Guo Prefix: work
Newreplacer () Val: Guo Keylen:3 found:true
Lookup () bestpriority:0 node.priority:0 Value:prefix:
Lookup () bestpriority:0 node.priority:0 value:prefix:??
Lookup () bestpriority:0 node.priority:4 value: Guo Prefix: work
Lookup () Bestpriority:4 node.priority:2 value: Guy prefix:
Newreplacer () Zhong gong val: Guo Keylen:3 found:true
Main () Replacer. Replace old= (China)--China Engineering (Guy)
Main () Replacer. Replace new= Country (country)--National worker (Guy)
Also, just found lookup (s string, ignoreroot bool) (Val string, Keylen int,found bool) in {}
Variables that are defined in the return value can be used directly in the function.
As for the return, direct return on the line, do not have to write the full return value, it is very convenient.
MAIL: [Email protected]
blog:http://blog.csdn.net/xcl168
Go Language source Replacer find part of the notes