The Domain prediction software Interproscan provides a variety of output formats, and the GFF3 format is chosen for the needs of post-analysis. I prefer to combine the database for analysis, so first import the data into the database.
I used QT to write the interface, so just add a qaction to the menu, and then add the slot function to the main window class. This gives me the slot function for parsing the file.
void Mainwindow::on_interprotomydb () {Pfamtomydbdlg * Dbdlg = new Pfamtomydbdlg (this); Dbdlg->exec (); QFile Qinfile (dbdlg->filename.tostdstring () c_str ());//QFile Qinfile ("MYPFAM.GFF3"); Qstringlist FullPath = Dbdlg->filename.split (Qregexp ("/")); QString path; for (int i = 0; i < fullpath.size ()-1; i++) {path.append (fullpath[i]); Path.append ("/"); }//Qmessagebox::information (This, "ERROR2", Path); QFile qoutfile ("A2.sql"); QFile qoutfile (path+dbdlg->name+ ". sql");//Qmessagebox::information (this, "ERROR3", dbdlg-> Filename.tostdstring (). C_STR ());//Qmessagebox::information (this, "ERROR4", dbdlg->name); if (!qoutfile.open (Qiodevice::readwrite | Qiodevice::text) {qmessagebox::information (this, "ERROR1", "failed to open"); Return } if (!qinfile.open (qiodevice::readonly | Qiodevice::text) {qmessagebox::information (this, "ERROR2", "failed to open"); return; }Qtextstream Mytextstream (&qoutfile); while (!qinfile.atend ()) {Qbytearray line = Qinfile.readline (); Line[line.size () -1]= ' + '; QString str (line); if (str[0]== ' # ' | | Str.size () = = 0) {continue; } if (!str.startswith ("Wp_")) {break; } qstringlist sections = Str.split (Qregexp ("\ t")); if (!sections[sections.size () -1].startswith ("Name")) {continue; } qstringlist Sections2 = Sections[sections.size () -1].split (Qregexp (";")); Qstringlist sections3 = Sections2[2].split (Qregexp ("")); QString SQL; if (sections3.size ()! = 3 | | sections2.size ()! = 6 | | !sections3[0].startswith ("target=") | | !sections2[0].startswith ("Name") | | !sections2[1].startswith ("Signature_desc") | | !sections2[3].startswith ("status") | | !sections2[4].startswith ("ID") | | !sections2[5].startswith ("DaTE ")) {qmessagebox::warning (this," ERROR ", QString (" format is wrong he Re, please add manully:\n ") +sections[sections.size () -1].tostdstring (). C_STR ()); Continue } sql.sprintf ("INSERT into Res_smart (Name, Signature_desc, Target,start, stop, status, Id_seq, date, Uid) values (\" %s\ ", \"%s\ ", '%s ', '%s ', '%s ', '%s ', '%s ', '%s ', (select Id from project_abbr where name = '%s '));", sections 2[0].split (qregexp ("=")) [1].tostdstring (). C_STR (), Sections2[1].split (qregexp ("=")) [1].tostdstring (). C_STR (), Sections3[0].split (qregexp ("=")) [1].tostdstring (). C_STR (), sections3[1].tostdstring (). C_STR (), Sections3[2].tostdstring (). C_STR (), Sections2[3].split (qregexp ("=")) [1].tostdstring (). C_STR (), Sections2[4].split (qregexp ("=")) [1].tostdstring (). C_STR (), Sections2[5].split (qregexp ("=")) [1].tostdstri Ng (). C_STR (), Dbdlg->naMe.tostdstring (). C_STR ()); mytextstream<<sql<< ' \ n '; } qoutfile.close (); Qinfile.close ();}
Pfamtomydbdlg inherits from Qdialog, and also gives
#ifndef pfamtomydbdlg_h#define pfamtomydbdlg_h#include <QDialog> #include <QPushButton> #include < qlineedit> #include <QLabel> #include <qcombobox>class pfamtomydbdlg:public qdialog{ q_ Objectpublic: QString filename; QString name;public: explicit Pfamtomydbdlg (Qwidget *parent = 0);p rivate: qlabel *filenamelabel, *namelabel; Qlineedit *filenameedit, *nameedit; Qpushbutton *openfilebtn, *exebtn; Qcombobox *namecombox;signals:public Slots: void On_openfile (); void On_exe ();}; #endif//Pfamtomydbdlg_h
#include "pfamtomydbdlg.h" #include <QHBoxLayout> #include <QVBoxLayout> #include <qmessagebox># Include <QFont> #include <QString> #include <QFileDialog> #include <qdebug>pfamtomydbdlg:: Pfamtomydbdlg (Qwidget *parent): Qdialog (parent) {Qvboxlayout * globallayout = new Qvboxlayout; Qhboxlayout * filenamelayout = new Qhboxlayout; Qhboxlayout * namelayout = new Qhboxlayout; Filenamelabel = new Qlabel (tr ("Filename")); Qfont * MyFont = new Qfont; Myfont->setbold (TRUE); Myfont->setpointsize (24); Filenamelabel->setfont (*myfont); Filenamelabel->setstylesheet ("color:red"); Filenameedit = new Qlineedit; OPENFILEBTN = new Qpushbutton ("open"); Connect (openfilebtn, SIGNAL (clicked ()), this, SLOT (On_openfile ())); Namelabel = new Qlabel (tr ("name")); Namelabel->setfont (*myfont); Namelabel->setstylesheet ("color:red"); Nameedit = new Qlineedit; Namecombox = new Qcombobox; Namecombox->seteditabLe (True); Namecombox->additem ("Cytophaga hutchinsonii"); Namecombox->additem ("Dyadobacter Fermentans"); Namecombox->additem ("Dyadobactor tibetensis"); Namecombox->additem ("Fibrella Aestuarina"); Namecombox->additem ("Fibrisome limi"); Namecombox->additem ("Fibrobacter succinogenes"); Namecombox->additem ("Runella slithyformis"); Namecombox->additem ("Leadbetterella Byssophilla"); Namecombox->additem ("Sporocytophaga myxococcoides"); Namecombox->additem ("Spirosoma Linguale"); EXEBTN = new Qpushbutton ("execute"); Connect (exebtn, SIGNAL (clicked ()), this, SLOT (On_exe ())); Filenamelayout->addwidget (Filenamelabel); Filenamelayout->addwidget (Filenameedit); Filenamelayout->addwidget (OPENFILEBTN); Namelayout->addwidget (Namelabel); Namelayout->addwidget (Namecombox); Globallayout->addlayout (filenamelayout); Globallayout->addlayout (namelayout); Globallayout->addwidget (EXEBTN); This->sEtlayout (globallayout);} void Pfamtomydbdlg::on_exe () {filename = Filenameedit->text (). trimmed (); name = Namecombox->currenttext (). Trimmed ();//qdebug () << name; This->close ();} void Pfamtomydbdlg::on_openfile () {QString temp = Qfiledialog::getopenfilename (this, "open", "c:/desktop/", "Files" (*. * )"); Filenameedit->settext (temp);}
The part to be extracted does not include the subsequent sequence. The middle part is characterized by a newline character to distinguish between different fields, so if you do not do complex analysis, you can copy this part of the results directly to Excel for analysis.
A C + + member function for parsing Interproscan results based on QT