Supports the Chinese-based prefix Tree (prefix trie) python implementation with the basic granularity of words,

Source: Internet
Author: User

Supports the Chinese-based prefix Tree (prefix trie) python implementation with the basic granularity of words,

TrieTree, also called dictionary tree and Prefix Tree. It can be used for "predictive text" and "autocompletion", or for Word Frequency Statistics (edge insertion of the Trie tree to update or add Word Frequency ).

In computer science,Trie, Also knownPrefix TreeOrDictionary treeIs an ordered tree used to save the associated array, where the key is usually a string. Unlike the Binary Search Tree, keys are not directly stored in the node, but determined by the node's position in the tree. All descendants of a node have the same prefix, that is, the string corresponding to the node, while the root node corresponds to a null string. Generally, not all nodes have corresponding values. Only the keys corresponding to leaf nodes and some internal nodes have related values.

References: http://zh.wikipedia.org/wiki/Trie

#! /Usr/bin/python #-*-coding: UTF-8-*-# * trie, prefix tree, can be used as a dict # * author: yangxudongsuda@gmail.comimport sys reload (sys) sys. setdefaultencoding ("UTF-8") # Singleton sentinel-works with picklingclass NULL (object): passclass Node: def _ init _ (self, value = NULL): self. value = value self. children = {} class Trie (object): def _ init _ (self): self. root = Node () def insert (self, key, v Alue = NULL, sep = ''): # key is a word sequence separated by 'sept' elements = key if isinstance (key, list) else key. split (sep) node = self. root for e in elements: if not e: continue if e not in node. children: child = Node () node. children [e] = child node = child else: node = node. children [e] node. value = value def get (self, key, default = None, sep = ''): elements = key if isinstance (key, list) Else key. split (sep) node = self. root for e in elements: if e not in node. children: return default node = node. children [e] return default if node. value is NULL else node. value def delete (self, key, sep = ''): elements = key if isinstance (key, list) else key. split (sep) self. _ delete (elements) def _ delete (self, elements, node = None, I = 0): node = node if node else self. root e = elements [I] if e I N node. children: child_node = node. children [e] if len (elements) = (I + 1): return node. children. pop (e) if len (child_node.children) = 0 else False elif self. _ delete (elements, child_node, I + 1): return node. children. pop (e) if (len (child_node.children) = 0 and not child_node.value) else False return False def longest_prefix (self, key, default = NULL, sep = ''): elements = key if isinstance (key, list) e Lse key. split (sep) results = [] node = self. root for e in elements: if e not in node. children: if len (node. children) = 0: return sep. join (results) elif default is not NULL: return default else: raise Exception ("no item matches any prefix of the given key! ") Results. append (e) node = node. children [e] if node. value is NULL: if default is not NULL: return default else: raise Exception ("no item matches any prefix of the given key! ") Return sep. join (results) def longest_prefix_value (self, key, default = NULL, sep = ''): elements = key if isinstance (key, list) else key. split (sep) node = self. root value = node. value for e in elements: if e not in node. children: if len (node. children) = 0: return value elif default is not NULL: return default else: raise Exception ("no item matches any prefix of the given key! ") Node = node. children [e] value = node. value if value is not NULL: return value if default is not NULL: return default raise Exception ("no item matches any prefix of the given key! ") Def longest_prefix_item (self, key, default = NULL, sep =''): elements = key if isinstance (key, list) else key. split (sep) node = self. root value = node. value results = [] for e in elements: if e not in node. children: if len (node. children) = 0: return (sep. join (results), value) elif default is not NULL: return default else: raise Exception ("no item matches any prefix of the given key! ") Results. append (e) node = node. children [e] value = node. value if value is not NULL: return (sep. join (results), value) if default is not NULL: return (sep. join (results), default) raise Exception ("no item matches any prefix of the given key! ") Def _ collect_items (self, node, path, results, sep): if node. value: results. append (sep. join (path), node. value) for k, v in node. children. iteritems (): path. append (k) self. _ collect_items (v, path, results, sep) path. pop () return results def items (self, prefix, sep = ''): elements = prefix if isinstance (prefix, list) else prefix. split (sep) node = self. root for e in elements: if e not in node. children: return [] node = node. children [e] results = [] path = [prefix] self. _ collect_items (node, path, results, sep) return results def keys (self, prefix, sep = ''): items = self. items (prefix, sep) return [key for key, value in items] if _ name _ = '_ main _': trie = Trie () trie. insert ('Happy Platform', 1) trie. insert ('Happy shopping mall ', 2) trie. insert ('sm ', 1) trie. insert ('sm International plasma ', 2) trie. insert ('sm city square ', 3) trie. insert ('sm plase', 4) trie. insert ('sm new life square ', 5) trie. insert ('sm shopping plase', 6) trie. insert ('soho shangdu', 3) trie. insert (['yang Xudong ', 'yes', 'tag', 'good'], 100) print trie. get ('sm ') print trie. get ('sm plase') print trie. get ('sm plase '. split ('') print trie. get ('shenma') print trie. get ('Happy platform') print trie. get ('Happy shopping mall ') print trie. longest_prefix ('soho square ', 'default') print trie. longest_prefix ('soho SunDo plase') print trie. longest_prefix_value ('soho SunDo plase') print trie. longest_prefix_value ('xx Shangdu square ', 90) print trie. longest_prefix_value ('xx Shangdu square ', 'no prefix') print trie. longest_prefix_item ('soho Shangdu square ') print '==================== keys ======================== 'print' prefix "sm ": ',' | '. join (trie. keys ('sm ')) print '================== items ========================== 'print' prefix "sm ": ', trie. items ('sm ') print '= = 'trie. delete ('sm plase') print trie. get ('sm plase') print '===== no item matches any prefix of given key ======== 'print trie. longest_prefix_value ('Happy ') print trie. longest_prefix_value ('soho xx') print trie. longest_prefix ('yang Xudong '. split (''))


The running result is as follows:

144None12defaultsoho 390no prefix ('soho \ xe5 \ xb0 \ x9a \ xe9 \ x83 \ xbd ', 3) ================== keys ============================= prefix "sm ": sm | sm new life Plaza | sm City Plaza | sm shopping mall | sm International Plaza ============ ==items =========================== prefix "sm ": [('sm ', 1), ('sm \ xe6 \ x96 \ xb0 \ xe7 \ x94 \ x9f \ xe6 \ xb4 \ xbb ', <class '_ main __. null'>), ('sm \ xe6 \ x96 \ xb0 \ xe7 \ x94 \ x9f \ xe6 \ xb4 \ xbb \ xe5 \ xb9 \ xbf \ xe5 \ x9c \ xba ', 5), ('sm \ xe5 \ x9f \ x8e \ xe5 \ xb8 \ x82 \ xe5 \ xb9 \ xbf \ xe5 \ x9c \ xba', 3 ), ('sm \ xe5 \ xb9 \ xbf \ xe5 \ x9c \ xba', 4), ('sm \ xe8 \ xb4 \ xad \ xe7 \ x89 \ xa9 ', <class '_ main __. null'>), ('sm \ xe8 \ xb4 \ xad \ xe7 \ x89 \ xa9 \ xe5 \ xb9 \ xbf \ xe5 \ x9c \ xba ', 6 ), ('sm \ xe5 \ x9b \ xbd \ xe9 \ x99 \ x85 ', <class' _ main __. null'>), ('sm \ xe5 \ x9b \ xbd \ xe9 \ x99 \ x85 \ xe5 \ xb9 \ xbf \ xe5 \ x9c \ xba', 2)] ======================== delete ======================================= None ====== no it Em matches any prefix of given key ======= Traceback (most recent call last): File ". /word_based_trie.py ", line 183, in <module> print trie. longest_prefix_value ('Happy ') File ". /word_based_trie.py ", line 98, in longest_prefix_value raise Exception (" no item matches any prefix of the given key! ") Exception: no item matches any prefix of the given key!






Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.