The blogger taught himself to Coursera from Johns Hopkins University < using Bioconductor to analyze genomic scientific data; very good, recommend to everyone Expressionset Overview expressionset-expression Matrix-phenotype Data Feature Data eSet: There are only multiple expression Matrix
Expressionset
Library (All), data (All), Experimentdata (All), Exprs (All) [1:4,1:4] Head (samplenames (All)) head (Featurenames (All)) head ( PData (All)) All$sex All[1:10,1:5] Featuredata (All) #包含关于基因的信息, but often without IDs <-featurenames (All) [1:5] Library ( hgu95av2.db) #之前数据集里面写了芯片平台hgu95av2. DB As.list (Hgu95av2entrezid[ids]) phenodata (All) #不如使用pData (All), Phenodata (all ) content more names (all) #有时相当于varLabels (all), sometimes varlabels (all) more detailed
Summarizedexperiment
#SummarizedExperiment Library (airway) data ("airway") airway ColData (airway) #pData (All) Airway$cell colnames (airway) Head (Rownames (airway)) Assaynames (airway) #要想获得表达矩阵, using assay accessor, get all expression matrix names Assaynames (assay, "airway") with counts () [1:4,1:4] Length (rowranges (airway)) rowranges (airway) #SummarizedExperiment特别之处在于每行每列都有关联的GRanges Start (airway) GR = Granges ("1", ranges = iranges (start = 1,end = 10^7)) subsetbyoverlaps (airway,gr) # ' It is the associated granges that can take out the expression value of the gene "in an area of the chromosome"
Geoquery
Library (geoquery) eList <-getgeo ("GSE11675") Length (eList) EData = elist[[1]] eData names (PData (eData)) EList2 = Getge Osuppfiles ("GSE11675") #下载原始tar包 EList2
Biomart
Library (Biomart) head (Listmarts ()) Mart <-Usemart ("Ensembl") Mart Head (Listdatasets (Mart)) ensemble <- useDataSet ("Hsapiens_gene_ensembl", Mart) values <-C ("202763_at", "209310_s_at", "207500_at") getbm (attributes = C ( "ensembl_gene_id", "affy_hg_u133_plus_2"), filters = "Affy_hg_u133_plus_2", values = Values,mart = Ensemble) attribute S <-listattributes (ensemble) #可以查询到的条目 nrow (attributes) #可以把一个物种的基因转换到另一个物种的同源基因 Head (attributes) filters <- Listfilters (Ensemble) #可以查询到的条目 Nrow (filters) #可以把一个物种的基因转换到另一个物种的同源基因 Head (filters) attributepages (Ensemble) # Attributes is stored in a page that can be used to reduce the search scope attributes <-listattributes (ensemble,page = "Feature_page") nrow (attributes)
R S4 Classes
Library (All) library (genomicranges) # ' S3 object is like a List,list each object has its own name # ' and the S4 object defines what each class should be something data ("all") all Class (All) IsS4 (All) class? expressionset# See a description of a class? " Expressionset-class "#查看一个class的简介 # ' list rules: initial uppercase # ' construction method Expressionset () getclass (" Expressionset ") #Slots插槽, It is this class where the small class constitutes all@annotation annotation (all) #class升级了, the definition has changed, with updateobject Old_object = Updateobject (old_ OBJECT) Validobject (All) #检测对象是否正确, does it conform to the definition of class
R S4 Methods
Library (Genomicranges) Genomicranges::as.data.frame#s4 method Base::as.data.frame#s3 method Showmethods ("As.data.frame") # Can see, the X type is different, the subsequent selection of the program code is also different #查看传入某一特定类型, corresponding to the relevant program code GetMethod ("As.data.frame", "Genomicranges") GetMethod ("As.data.frame ", Signature (x=" Genomicranges ")) #查看传入某一特定类型, corresponding to the Help document method?" As.data.frame,dataframe "Method?" As.data.frame,genomicranges "?" As.data.frame,dataframe-method "?" As.data.frame,genomicranges-method "Showmethods (" Findoverlaps ") GetMethod (" Findoverlaps ", signature (query =" Ranges ", subject =" Ranges "))?" Findoverlaps,ranges,ranges-method "# ' S4 Cons: Difficult to find help documents, hard to see the source code directly, difficult to debug # ' but best S4 write a package, easy to manage
And finally the complete code snippet.
#ExpressionSet Library (All), data (All), Experimentdata (All), Exprs (All) [1:4,1:4] Head (samplenames (All)) head ( Featurenames (All)) head (PData (All)) All$sex All[1:10,1:5] Featuredata (All) #包含关于基因的信息, but often without IDs <-Featurenames ( All) [1:5] The library (hgu95av2.db) #之前数据集里面写了芯片平台hgu95av2. DB As.list (Hgu95av2entrezid[ids]) phenodata (All) #不如使用pData ( All), Phenodata (all) content more names (PData (All)) #有时相当于varLabels (all), sometimes varlabels (all) in more detail #SummarizedExperiment library (
Airway) data ("airway") airway ColData (airway) #pData (All) Airway$cell colnames (airway) head (Rownames (airway)) Assaynames (airway) #要想获得表达矩阵, using assay accessor, get all expression matrix name Assaynames (assay, "airway") [counts] Length ( Rowranges (airway)) rowranges (airway) #SummarizedExperiment特别之处在于每行每列都有关联的GRanges Start (airway) gr = Granges ("1", Ranges = iranges (start = 1,end = 10^7)) subsetbyoverlaps (airway,gr) # ' It is the associated granges that can take out the expression value of the gene "in an area" of the chromosome library ( Geoquery) eList <-getgeo ("GSE11675") Length (eList) EData = elist[[1]] eData names (PData (eData)) EList2 = GetGeosuppfiles ("GSE11675") #下载原始tar包 EList2 Library (Biomart) head (Listmarts ()) Mart <-Usemart ("Ensembl") Mart Head ( Listdatasets (Mart)) Ensemble <-useDataSet ("Hsapiens_gene_ensembl", Mart) values <-C ("202763_at", "209310_s_at" , "207500_at") getbm (attributes = C ("ensembl_gene_id", "affy_hg_u133_plus_2"), filters = "affy_hg_u133_plus_2", values
= Values,mart = ensemble) attributes <-listattributes (Ensemble) #可以查询到的条目 nrow (attributes) #可以把一个物种的基因转换到另一个物种的同源基因
Head (attributes) filters <-listfilters (Ensemble) #可以查询到的条目 Nrow (filters) #可以把一个物种的基因转换到另一个物种的同源基因 Head (filters) Attributepages (Ensemble) #attributes存储在一个一个page中, you can use this to reduce the search scope attributes <-listattributes (ensemble,page = " Feature_page ") nrow (attributes) library (all) library (genomicranges) # ' S3 object is like a List,list each object has its own name # ' And the S4 object defines what each class should be, the data ("all"), the All class, IsS4 (All) class? expressionset# See a description of a class? " Expressionset-class "#查看一个class的简介 # ' list rules: initial uppercase # ' construction method Expressionset () getclass (" Expressionset ") #SThe lots slot is where the class is made up of the small class all@annotation annotation (All) #class升级了, the definition changed, with updateobject Old_object = Updateobject ( Old_object) Validobject (All) #检测对象是否正确, conforms to the class definition of the library (genomicranges) Genomicranges::as.data.frame#s4 method Base:: As.data.frame#s3 method Showmethods ("As.data.frame") #可以看见, the X type is different, the subsequent selection of program code is also different #查看传入某一特定类型, corresponding to the relevant program code GetMethod ("
As.data.frame "," Genomicranges ") GetMethod (" As.data.frame ", Signature (x=" Genomicranges ")) #查看传入某一特定类型, corresponding help document Method? " As.data.frame,dataframe "Method?" As.data.frame,genomicranges "?" As.data.frame,dataframe-method "?" As.data.frame,genomicranges-method "Showmethods (" Findoverlaps ") GetMethod (" Findoverlaps ", signature (query =" Ranges ", subject =" Ranges "))?"
Findoverlaps,ranges,ranges-method "# ' S4 Cons: Difficult to find help documents, hard to see the source code directly, difficult to debug # ' but best S4 write a package, easy to manage