SVM is a very powerful machine learning classification algorithm, which has many applications in many fields, such as text classification, image classification, biological sequence analysis and biological data mining, handwritten character recognition and so on. Specific theoretical things refer to the post http://www.dataguru.cn/forum.php?mod=viewthread&tid=371987 read or indefinitely.
Recently in the SVM classification, processing object is a malicious program dynamic analysis generated by the basic behavior of the file, it is 1*811 0, 1 strings, LIBSVM needs the format is a label index:value, just started to use Python implementation, because its file operation is not very powerful, Do not spray may be I am not familiar with Python. Finally with C + + implementation, during the period also appeared some problems, finally done.
The following code is posted:
The first is the implementation of Python from the sample analysis results found in the basic behavior of the file, because the basic behavior files are with the Result_txt suffix, the code is as follows:
#!/usr/bin/pythonImportOSImportShutil#extracting basic business characteristics from a sample run resultDpath = R"C:\Users\xd\Desktop\1.12\test0"rpath= R"C:\Users\xd\Desktop\1.12\test"Filenames=Os.listdir (Dpath) forFileNameinchFilenames:filepath=dpath+"/"+filenameifOs.path.isdir (Dpath): Names=Os.listdir (filepath) forNameinchnames:if(name = = filename +"_"+"Result.txt"): Fpath= FilePath +"/"+name Shutil.copy (fpath,rpath)
C + + implements code for file format conversion of basic behavior files under folders:
#include <iostream>#include<iomanip>#include<fstream>#include<io.h>#include<vector>#include<string>using namespacestd;voidGetFiles (stringPath, vector<string>&files);voidMain () {inta[812]; Ifstream F; a[0]=1; Chars; //Char *ss; Char* filepath="c:\\users\\xd\\desktop\\1.12\\test"; Vector<string>files; GetFiles (Filepath,files); intSize =files.size (); Ofstream F1; F1.open ("E:\\test.txt"); for(inti =0; I < size;i++) {cout<<files[i].c_str () <<Endl; //ss=files[i].c_str ();F.open (Files[i].c_str ()); for(intn=1; n<=811; n++) {s=f.Get(); A[n]=s-'0'; } f.close (); F1<<a[0]<<" "; for(intj=1; j<=811; j + +) {F1<<j<<":"<<a[j]<<" "; } F1<<Endl; } f1.close ();}voidGetFiles (stringPath, vector<string>&files) { Longhfile =0; struct_finddata_t FileInfo; stringp; if(hfile = _findfirst (p.assign (Path). Append ("\\*"). C_str (), &fileinfo))! =-1) { Do { if((Fileinfo.attrib &_a_subdir)) { if(strcmp (Fileinfo.name,".") !=0&& strcmp (Fileinfo.name,"..") !=0) getFiles (p.assign (Path). Append ("\\"). Append (Fileinfo.name), files); } Else{files.push_back (p.assign (Path). Append ("\\"). Append (Fileinfo.name)); } } while(_findnext (hfile, &fileinfo) = =0); _findclose (hfile); }}
Reads the basic behavior file under the sample and processes it into the format required by the LIBSVM