#! /Usr/bin/env python
"""
HMM module
This module implements simple Hidden Markov Model class. It follows the description in
Chapter 6 of Jurafsky and Martin (2008) fairly closely, with one exception: in this
Implementation, we assume that all states are initial states.
@ Author: Rob Malouf
@ Organization: Dept. of Linguistics, San Diego State University
@ Contact: rmalouf@mail.sdsu.edu
@ Version: 2
@ Since: 24-march-2008
"""
From copy import copy
Class HMM (object ):
"""
Class for Hidden Markov Models
An HMM is a weighted FSA which consists:
-A set of states (0 .. C {self. states })
-An output alphabet (C {self. alphabet })
-A table of state transition probabilities (C {self. })
-A table of symbol emission probabilities (C {self. B })
-A list of initial probabilies (C {self. initial })
We assume that the HMM is complete, and that all states are both initial and final
States.
"""
Def _ init _ (self, states, alphabet, A, B, initial ):
"""
Create a new FSA object
@ Param states: states
@ Type states: C {list}
@ Param alphabet: output alphabet
@ Type finals: C {list}
@ Param A: transition probabilities
@ Type finals: C {list} of C {list} s
@ Param B: emission probabilities
@ Type finals: C {list} of C {list} s
@ Param initial: initial state probabilities
@ Type initial: C {list} of C {int} s
@ Raise ValueError: the HMM is mis-specified somehow
"""
# Basic configuration
Self. states = states
Self. N = len (self. states)
Self. alphabet = alphabet
# Initial probabilities
Self. initial = initial
If len (self. initial )! = Self. N:
Raise ValueError, 'only found % d initial probabilities '% len (self. initial)
If ABS (sum (self. Initial)-1.0)> 1e-8:
Raise valueerror, 'improper initial probabilities'
# Transition probabilities
Self. A =
If Len (self. )! = Self. N:
Raise valueerror, 'only found % d transition probability distributions '% Len (self.)
For I in xrange (self. N ):
If Len (self. A [I])! = Self. N:
Raise valueerror, 'only found % d transition probabilities for State % d' % (LEN (self. A [I]), I)
If ABS (sum (self. A [I)-1.0)> 1e-8:
Raise valueerror, 'improper transition probabilities for State % d' % I
# Emission probabilities
Self. B = B
If Len (self. B )! = Self. N:
Raise valueerror, 'only found % d emission probability distributions '% Len (self. B)
For I in xrange (self. N ):
If I! = Self. Initial: # No output from initial state
If Len (self. B [I])! = Len (self. alphabet ):
Raise valueerror, 'only found % d emission probabilities for State % d' % (LEN (self. B [I]), I)
If I! = ABS (sum (self. B [I)-1.0)> 1e-8:
Raise valueerror, 'improper emission probabilities for State % d' % I
Def allseqs (self, n ):
"Generate all possible state sequences of length n """
If n = 0:
Yield []
Else:
For first in self. states:
For rest in self. allseqs (n-1 ):
Yield [first] + rest
Def prob (self, S, O ):
"Calculate P (O, S | M )"""
Assert len (O) = len (S)
Assert Len (o)> 0
# Convert state and output names to indices
Try:
S = [self. States. Index (t) for T in S]
Failed t valueerror:
Raise valueerror, 'unknown state: % s' % t
Try:
O = [self. Alphabet. Index (t) for T in O]
Failed t valueerror:
Raise valueerror, 'unknown output: % s' % t
Prob = self. initial [S [0] * self. B [S [0] [O [0]
For t in xrange (1, len (S )):
Prob * = self. A [S [T-1] [S [t] * self. B [S [t] [O [t]
Return prob
Def probseq (self, O ):
"Calculate P (O | M) the hard way """
# Sum probs
Prob = sum (self. prob (S, O) for S in self. allseqs (len (O )))
Return prob
Def forward (self, O ):
"Calculate P (O | M) using the Forward Algorithm """
# Convert output names to indices
Try:
O = [self. Alphabet. Index (t) for T in O]
Failed t valueerror:
Raise valueerror, 'unknown output: % s' % t
# Initialize trellis
Trellis = [[self. Initial [T] * Self. B [T] [O [0] for T in xrange (self. n)]
# Fill in the rest of the trellis
For T in xrange (1, Len (O )):
Trellis. append ([0.0] * Self. N)
For s2 in xrange (self. N ):
For s1 in xrange (self. N ):
Trellis [-1] [s2] + = self. A [s1] [s2] * self. B [s2] [O [t] * trellis [-2] [s1]
# Find total probability
Return sum (trellis [-1])
Def main ():
# Set up Eisner's HMM
States = ['hot ', 'cold']
Output = [1, 2, 3]
Transition = [[0.7, 0.3],
[0.4, 0.6]
Emission = [[0.2, 0.4, 0.4],
[0.5, 0.4, 0.1]
Initial = [0.8, 0.2]
M = HMM (states, output, transition, emission, initial)
Print 'P ([HOT, HOT, COLD], [3, 1, 3] | M) = ',
Print m. prob (['hot ', 'hot', 'cold'], [3, 1, 3])
Print 'P ([3, 1, 3] | M) = ',
Print m. probseq ([3, 1, 3]), '(by the naive method )'
Print 'P ([3, 1, 3] | M) = ',
Print m. forward ([3, 1, 3]), '(by the Forward Algorithm )'
Main ()