Code implementation of HMM hidden Markov model

Source: Internet
Author: User
#encoding: utf-8 import sys import pickle from copy import deepcopy Is_train = False Default_prob = 0.000000000001 MIN_PR OB =-1 * Float (' inf ') Train_path = "train.in" Test_path = "test.in" Output_path = "Test.out" #统计 each number of times as each probability def train (): print "Start training ..." # The following 5 elements are the HMM model parameters V = set () # observation Set Q = set () # state Set A = {} # state transition probability matrix, P (State | state), is a two-layer dict specifically pre_state-> (state->prob) B = {} # observation probability matrix, P (observation | state), is a two-layer dict specifically state-> (Observ->pro b) PI = {} # initial state probability vector # Statistical model parameters with open (Train_path, "RB") as infile:pre_s =-1 # t-1 status F or line in Infile:segs = Line.rstrip (). Split (' \ t ') If Len (segs)! = 2: # When encountering empty rows pre _s = 1 Else:o = segs[0] # t moment of observation o s = segs[1] # t moment State S # stats State S to the number of observations o b[s][o] = B.setdefault (s, {}). SetDefault (o, 0) + 1 v.add (o) q.a DD (s) if pre_s = =-1: # count the number of times each sentence begins with the first state pi[s] = Pi.setdefault (s, 0) + 1 Else: # Stats status pre_s to state s
The number of times a[pre_s][s] = A.setdefault (pre_s, {}). SetDefault (s, 0) + 1 pre_s = s #切换到下一个状态
# probability normalization for I in A.keys (): prob_sum = 0 for J in A[i].keys (): Prob_sum + = A[i][j]
For j in A[i].keys (): a[i][j] = 1.0 * A[I][J]/prob_sum for I in B.keys (): prob_sum = 0  For j in B[i].keys (): Prob_sum + = B[i][j] for j in B[i].keys (): b[i][j] = 1.0 * B[I][J]/ Prob_sum prob_sum = SUM (pi.values ()) for I in Pi.keys (): pi[i] = 1.0 * Pi[i]/prob_sum print "Finis
Hed training ... "Return A, B, Pi, V, q def Savemodel (A, B, Pi, V, q): With open (" A.param "," WB ") as outfile: Pickle.dump (A, outfile) with open ("B.param", "WB") as Outfile:pickle.dump (B, outfile) with open ("PI . Param "," WB ") as OutfilE:pickle.dump (PI, outfile) with open ("V.param", "WB") as Outfile:pickle.dump (V, outfile) with op  En ("Q.param", "WB") as Outfile:pickle.dump (Q, outfile) #维特比 def predict (X, A, B, PI, V, Q): W = [{} for T ' in Range (len (X))] #相当于书上的δ path = {} for s in q:w[0][s] = 1.0 * Pi.get (S, default_prob) * B.get (s, {}). Get ( X[0], Default_prob) #0时刻状态为s的概率 path[s] = [s] for T in range (1, Len (X)): New_path = {} for S I N Q: #两轮循环暴力求解 max_prob = Min_prob max_s = "for pre_s in Q:prob = W [T-1]  [pre_s] * \ a.get (pre_s, {}). Get (S, default_prob) * \ b.get (s, {}). Get (X[t], Default_prob) (Max_prob, max_s) = Max ((Max_prob, max_s), (PROB, pre_s)) #全由第一个prob决定 W[t][s]
= Max_prob #t时刻状态为s的最大概率 tmp = deepcopy (path[max_s]) tmp.append (s) new_path[s] = tmp Path = new_pAth (max_prob, max_s) = Max ((W[len (X) -1][s], s) for S in Q) # The last time the probability of each state is the largest return path[max_s] def getmodel ():
With open ("A.param", "RB") as Infile:a = Pickle.load (infile) with open ("B.param", "RB") as infile: B = Pickle.load (infile) with open ("Pi.param", "RB") as Infile:pi = Pickle.load (infile) with open ("V.PA Ram "," RB ") as Infile:v = Pickle.load (infile) with open (" Q.param "," RB ") as Infile:q = Pickle.load (i Nfile) return A, B, Pi, V, q def Test (A, B, Pi, V, q): print "Start testing" with open (Test_path, "RB") As infile, \ Open (Output_path, "WB") as Outfile:x_test = [] Y_test = [] for line in INFI Le:segs = Line.strip (). Split (' \ t ') If Len (segs)! = 2: # When encountering empty rows if Len (x_test) = = 0  : #一整句 such as nbad continue preds = Predict (X_test, A, B, PI, V, Q) for Vals In Zip (x_test, y_test,Preds): Outfile.write ("\ T". Join (Vals) + "\ n") outfile.write ("\ n") X  _test = [] Y_test = [] Else:o = segs[0] # t moment of observation o s = segs[1] # T-moment status S X_test.append (o) y_test.append (s) print "finished testing" def Mai N (): If is_train:a, B, Pi, V, q = Train () Savemodel (A, B, Pi, V, q) else:a, B, Pi, V, q
= Getmodel () test (A, B, PI, V, Q) if __name__ = = ' __main__ ': Main ()

Data in Https://github.com/guotong1988/MachineLearningFromZero
refer to the statistical learning method

Related Keywords:

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

• Sales Support

1 on 1 presale consultation

• After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

• Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.