之前看到過C版本的,感覺寫的很長,沒有仔細看,但是C++版本的寫的還是很不錯的。我仔細看了一下,並對內容進行了仔細的注釋,如果有人沒有看懂,歡迎留言討論。
先看一眼標頭檔,在background_segm.hpp中
class CV_EXPORTS_W BackgroundSubtractorMOG : public BackgroundSubtractor{public: //! the default constructor CV_WRAP BackgroundSubtractorMOG(); //! the full constructor that takes the length of the history, the number of gaussian mixtures, the background ratio parameter and the noise strength CV_WRAP BackgroundSubtractorMOG(int history, int nmixtures, double backgroundRatio, double noiseSigma=0); //! the destructor virtual ~BackgroundSubtractorMOG(); //! the update operator virtual void operator()(InputArray image, OutputArray fgmask, double learningRate=0); //! re-initiaization method virtual void initialize(Size frameSize, int frameType); virtual AlgorithmInfo* info() const;protected: Size frameSize; int frameType; Mat bgmodel; int nframes; int history;//利用曆史幀數計算學習速率,不是主要參數 int nmixtures;//高斯模型的個數 double varThreshold;//方差門限 double backgroundRatio;//背景門限 double noiseSigma;//雜訊方差};
再看一眼源檔案,在bgfg_gaussmix.cpp中:
static const int defaultNMixtures = 5;//預設混合模型個數static const int defaultHistory = 200;//預設曆史幀數static const double defaultBackgroundRatio = 0.7;//預設背景門限static const double defaultVarThreshold = 2.5*2.5;//預設方差門限static const double defaultNoiseSigma = 30*0.5;//預設雜訊方差static const double defaultInitialWeight = 0.05;//預設初始權值 //不帶參數的建構函式,使用預設值 BackgroundSubtractorMOG::BackgroundSubtractorMOG(){ frameSize = Size(0,0); frameType = 0; nframes = 0; nmixtures = defaultNMixtures; history = defaultHistory; varThreshold = defaultVarThreshold; backgroundRatio = defaultBackgroundRatio; noiseSigma = defaultNoiseSigma;}//帶參數的建構函式,使用參數傳進來的值 BackgroundSubtractorMOG::BackgroundSubtractorMOG(int _history, int _nmixtures, double _backgroundRatio, double _noiseSigma){ frameSize = Size(0,0); frameType = 0; nframes = 0; nmixtures = min(_nmixtures > 0 ? _nmixtures : defaultNMixtures, 8);//不能超過8個,否則就用預設的 history = _history > 0 ? _history : defaultHistory;//不能小於0,否則就用預設的 varThreshold = defaultVarThreshold;//門限使用預設的 backgroundRatio = min(_backgroundRatio > 0 ? _backgroundRatio : 0.95, 1.);//背景門限必須大於0,小於1,否則使用0.95 noiseSigma = _noiseSigma <= 0 ? defaultNoiseSigma : _noiseSigma;//雜訊門限大於0} BackgroundSubtractorMOG::~BackgroundSubtractorMOG(){}void BackgroundSubtractorMOG::initialize(Size _frameSize, int _frameType){ frameSize = _frameSize; frameType = _frameType; nframes = 0; int nchannels = CV_MAT_CN(frameType); CV_Assert( CV_MAT_DEPTH(frameType) == CV_8U ); // for each gaussian mixture of each pixel bg model we store ... // the mixture sort key (w/sum_of_variances), the mixture weight (w), // the mean (nchannels values) and // the diagonal covariance matrix (another nchannels values) bgmodel.create( 1, frameSize.height*frameSize.width*nmixtures*(2 + 2*nchannels), CV_32F );//初始化一個1行*XX列的矩陣//XX是這樣計算的:映像的行*列*混合模型的個數*(1(優先順序) + 1(權值) + 2(均值 + 方差) * 通道數) bgmodel = Scalar::all(0);//清零}//設為模版,就是考慮到了彩色映像與灰階映像兩種情況 template<typename VT> struct MixData{ float sortKey; float weight; VT mean; VT var;}; static void process8uC1( const Mat& image, Mat& fgmask, double learningRate, Mat& bgmodel, int nmixtures, double backgroundRatio, double varThreshold, double noiseSigma ){ int x, y, k, k1, rows = image.rows, cols = image.cols; float alpha = (float)learningRate, T = (float)backgroundRatio, vT = (float)varThreshold;//學習速率、背景門限、方差門限 int K = nmixtures;//混合模型個數 MixData<float>* mptr = (MixData<float>*)bgmodel.data; const float w0 = (float)defaultInitialWeight;//初始權值 const float sk0 = (float)(w0/(defaultNoiseSigma*2));//初始優先順序 const float var0 = (float)(defaultNoiseSigma*defaultNoiseSigma*4);//初始方差 const float minVar = (float)(noiseSigma*noiseSigma);//最小方差 for( y = 0; y < rows; y++ ) { const uchar* src = image.ptr<uchar>(y); uchar* dst = fgmask.ptr<uchar>(y); if( alpha > 0 )//如果學習速率為0,則退化為背景相減 { for( x = 0; x < cols; x++, mptr += K ) { float wsum = 0; float pix = src[x];//每個像素 int kHit = -1, kForeground = -1;//是否屬於模型,是否屬於前景 for( k = 0; k < K; k++ )//每個高斯模型 { float w = mptr[k].weight;//當前模型的權值 wsum += w;//權值累加 if( w < FLT_EPSILON ) break; float mu = mptr[k].mean;//當前模型的均值 float var = mptr[k].var;//當前模型的方差 float diff = pix - mu;//當前像素與模型均值之差 float d2 = diff*diff;//平方 if( d2 < vT*var )//是否小於方門限,即是否屬於該模型 { wsum -= w;//如果匹配,則把它減去,因為之後會更新它 float dw = alpha*(1.f - w); mptr[k].weight = w + dw;//增加權值//注意源文章中涉及機率的部分多進行了簡化,將機率變為1 mptr[k].mean = mu + alpha*diff;//修正均值 var = max(var + alpha*(d2 - var), minVar);//開始時方差清零0,所以這裡使用雜訊方差作為預設方差,否則使用上一次方差 mptr[k].var = var;//修正方差 mptr[k].sortKey = w/sqrt(var);//重新計算優先順序,貌似這裡不對,應該使用更新後的mptr[k].weight而不是w for( k1 = k-1; k1 >= 0; k1-- )//從匹配的第k個模型開始向前比較,如果更新後的單高斯模型優先順序超過他前面的那個,則交換順序 { if( mptr[k1].sortKey >= mptr[k1+1].sortKey )//如果優先順序沒有發生改變,則停止比較 break; std::swap( mptr[k1], mptr[k1+1] );//交換它們的順序,始終保證優先順序最大的在前面 } kHit = k1+1;//記錄屬於哪個模型 break; } } if( kHit < 0 ) // no appropriate gaussian mixture found at all, remove the weakest mixture and create a new one//當前像素不屬於任何一個模型 {//初始化一個新模型 kHit = k = min(k, K-1);//有兩種情況,當最開始的初始化時,k並不是等於K-1的 wsum += w0 - mptr[k].weight;//從權值總和中減去原來的那個模型,並加上預設權值 mptr[k].weight = w0;//初始化權值 mptr[k].mean = pix;//初始化均值 mptr[k].var = var0;//初始化方差 mptr[k].sortKey = sk0;//初始化權值 } else for( ; k < K; k++ ) wsum += mptr[k].weight;//求出剩下的總權值 float wscale = 1.f/wsum;//歸一化 wsum = 0; for( k = 0; k < K; k++ ) { wsum += mptr[k].weight *= wscale; mptr[k].sortKey *= wscale;//計算歸一化權值 if( wsum > T && kForeground < 0 ) kForeground = k+1;//第幾個模型之後就判為前景了 } dst[x] = (uchar)(-(kHit >= kForeground));//判決:(ucahr)(-true) = 255;(uchar)(-(false)) = 0; } } else//如果學習速率小於等於0,則沒有背景更新過程,其他過程類似 { for( x = 0; x < cols; x++, mptr += K ) { float pix = src[x]; int kHit = -1, kForeground = -1; for( k = 0; k < K; k++ ) { if( mptr[k].weight < FLT_EPSILON ) break; float mu = mptr[k].mean; float var = mptr[k].var; float diff = pix - mu; float d2 = diff*diff; if( d2 < vT*var ) { kHit = k; break; } } if( kHit >= 0 ) { float wsum = 0; for( k = 0; k < K; k++ ) { wsum += mptr[k].weight; if( wsum > T ) { kForeground = k+1; break; } } } dst[x] = (uchar)(kHit < 0 || kHit >= kForeground ? 255 : 0); } } }} static void process8uC3( const Mat& image, Mat& fgmask, double learningRate, Mat& bgmodel, int nmixtures, double backgroundRatio, double varThreshold, double noiseSigma ){ int x, y, k, k1, rows = image.rows, cols = image.cols; float alpha = (float)learningRate, T = (float)backgroundRatio, vT = (float)varThreshold; int K = nmixtures; const float w0 = (float)defaultInitialWeight; const float sk0 = (float)(w0/(defaultNoiseSigma*2*sqrt(3.))); const float var0 = (float)(defaultNoiseSigma*defaultNoiseSigma*4); const float minVar = (float)(noiseSigma*noiseSigma); MixData<Vec3f>* mptr = (MixData<Vec3f>*)bgmodel.data; for( y = 0; y < rows; y++ ) { const uchar* src = image.ptr<uchar>(y); uchar* dst = fgmask.ptr<uchar>(y); if( alpha > 0 ) { for( x = 0; x < cols; x++, mptr += K ) { float wsum = 0; Vec3f pix(src[x*3], src[x*3+1], src[x*3+2]); int kHit = -1, kForeground = -1; for( k = 0; k < K; k++ ) { float w = mptr[k].weight; wsum += w; if( w < FLT_EPSILON ) break; Vec3f mu = mptr[k].mean; Vec3f var = mptr[k].var; Vec3f diff = pix - mu; float d2 = diff.dot(diff); if( d2 < vT*(var[0] + var[1] + var[2]) ) { wsum -= w; float dw = alpha*(1.f - w); mptr[k].weight = w + dw; mptr[k].mean = mu + alpha*diff; var = Vec3f(max(var[0] + alpha*(diff[0]*diff[0] - var[0]), minVar), max(var[1] + alpha*(diff[1]*diff[1] - var[1]), minVar), max(var[2] + alpha*(diff[2]*diff[2] - var[2]), minVar)); mptr[k].var = var; mptr[k].sortKey = w/sqrt(var[0] + var[1] + var[2]); for( k1 = k-1; k1 >= 0; k1-- ) { if( mptr[k1].sortKey >= mptr[k1+1].sortKey ) break; std::swap( mptr[k1], mptr[k1+1] ); } kHit = k1+1; break; } } if( kHit < 0 ) // no appropriate gaussian mixture found at all, remove the weakest mixture and create a new one { kHit = k = min(k, K-1); wsum += w0 - mptr[k].weight; mptr[k].weight = w0; mptr[k].mean = pix; mptr[k].var = Vec3f(var0, var0, var0); mptr[k].sortKey = sk0; } else for( ; k < K; k++ ) wsum += mptr[k].weight; float wscale = 1.f/wsum; wsum = 0; for( k = 0; k < K; k++ ) { wsum += mptr[k].weight *= wscale; mptr[k].sortKey *= wscale; if( wsum > T && kForeground < 0 ) kForeground = k+1; } dst[x] = (uchar)(-(kHit >= kForeground)); } } else { for( x = 0; x < cols; x++, mptr += K ) { Vec3f pix(src[x*3], src[x*3+1], src[x*3+2]); int kHit = -1, kForeground = -1; for( k = 0; k < K; k++ ) { if( mptr[k].weight < FLT_EPSILON ) break; Vec3f mu = mptr[k].mean; Vec3f var = mptr[k].var; Vec3f diff = pix - mu; float d2 = diff.dot(diff); if( d2 < vT*(var[0] + var[1] + var[2]) ) { kHit = k; break; } } if( kHit >= 0 ) { float wsum = 0; for( k = 0; k < K; k++ ) { wsum += mptr[k].weight; if( wsum > T ) { kForeground = k+1; break; } } } dst[x] = (uchar)(kHit < 0 || kHit >= kForeground ? 255 : 0); } } }}void BackgroundSubtractorMOG::operator()(InputArray _image, OutputArray _fgmask, double learningRate){ Mat image = _image.getMat(); bool needToInitialize = nframes == 0 || learningRate >= 1 || image.size() != frameSize || image.type() != frameType;//是否需要初始化 if( needToInitialize ) initialize(image.size(), image.type());//初始化 CV_Assert( image.depth() == CV_8U ); _fgmask.create( image.size(), CV_8U ); Mat fgmask = _fgmask.getMat(); ++nframes; learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./min( nframes, history ); CV_Assert(learningRate >= 0); if( image.type() == CV_8UC1 )//處理灰階映像 process8uC1( image, fgmask, learningRate, bgmodel, nmixtures, backgroundRatio, varThreshold, noiseSigma ); else if( image.type() == CV_8UC3 )//處理彩色映像 process8uC3( image, fgmask, learningRate, bgmodel, nmixtures, backgroundRatio, varThreshold, noiseSigma ); else CV_Error( CV_StsUnsupportedFormat, "Only 1- and 3-channel 8-bit images are supported in BackgroundSubtractorMOG" );} }
其中處理3通道彩色映像與處理單通道灰階映像類似,我就沒有進行注釋了。
其中有幾點需要注意:
1.在高斯混合模型中需要使用機率更新參數的地方,程式中都簡化成為了1處理,否則計算一個常態分佈的機率還是挺花時間的。(程式作者在注釋中也指出了他不是完全按照論文寫成的,而是做了一些小的修改)。但是除了將機率換成1,其他地方還是嚴格按照公式的,大家可以仔細推導一下,就會看出其中的差異。
2.作者原文中是如果沒有一個高斯模型與該像素點匹配,則去掉一個一個機率最小的,而用當前像素初始化的分布來替代他,而在這裡變成了去掉優先順序最小的。
3.程式中為了避免多次做迴圈,把一些步驟拆開做了,比如歸一化權值需要先求出總權值,調整權值後的排序之類的,計算背景模型個數等等。減少了遍曆的次數。其中的巧妙之處也不得不佩服作者的良苦用心。
3.就是似乎更新優先順序的計算有點小問題,也可能是我理解不對。
4.在初始化時,可以使用多種方式,大家一看程式就明白了。
最後附上一個小的樣本程式,教你如何使用高斯混合模型:
int main(){VideoCapture capture("D:/videos/shadow/use3.MPG");if( !capture.isOpened() ){cout<<"讀取視頻失敗"<<endl;return -1;}//擷取整個幀數long totalFrameNumber = capture.get(CV_CAP_PROP_FRAME_COUNT);cout<<"整個視頻共"<<totalFrameNumber<<"幀"<<endl;//設定開始幀()long frameToStart = 200;capture.set( CV_CAP_PROP_POS_FRAMES,frameToStart);cout<<"從第"<<frameToStart<<"幀開始讀"<<endl;//設定結束幀int frameToStop = 650;if(frameToStop < frameToStart){cout<<"結束幀小於開始幀,程式錯誤,即將退出!"<<endl;return -1;}else{cout<<"結束幀為:第"<<frameToStop<<"幀"<<endl;}double rate = capture.get(CV_CAP_PROP_FPS);int delay = 1000/rate;Mat frame;//前景圖片Mat foreground;//使用預設參數調用混合高斯模型BackgroundSubtractorMOG mog;bool stop(false);//currentFrame是在迴圈體中控制讀取到指定的幀後迴圈結束的變數long currentFrame = frameToStart;while( !stop ){if( !capture.read(frame) ){cout<<"從視頻中讀取映像失敗或者讀完整個視頻"<<endl;return -2;}cvtColor(frame,frame,CV_RGB2GRAY);imshow("輸入視頻",frame);//參數為:輸入映像、輸出映像、學習速率mog(frame,foreground,0.01);imshow("前景",foreground);//按ESC鍵退出,按其他鍵會停止在當前幀int c = waitKey(delay);if ( (char)c == 27 || currentFrame >= frameToStop){stop = true;}if ( c >= 0){waitKey(0);}currentFrame++;}waitKey(0);}
就說這麼多吧,雖然我