# ! /Usr/bin/ENV Python # -*-Coding: UTF-8 -*- # Factory Model Def Createdomtree (htmlstream, type = ' Soup ' ): If Type = " Soup " : Return Tndomtreewithsoup (htmlstream) If Type = " Lxml " : Return Tndomtreewithlxml (htmlstream) Return None # External dependent interface, which shields the third-party library I use Class Tndomtree: # Public Def _ Init __ (Self, htmlstream): self.html stream = Htmlstream Def Getlinklist (Self ): Return Self. _ getelementbytagname ( ' A ' ) # Extracts the common behavior of sub-classes to the base class Def Getimagelist (Self ): Return Self. _ getelementbytagname ( ' IMG ' ) # Extracts the common behavior of sub-classes to the base class Def Elementtostring (self, element ): Pass Def Getattrvalueofelement (self, element, attname ): Pass # Private Def _ Getelementbytagname (self, tagname ): # Subclass implementation of this method Pass # Use beautifulsoup class From Beautifulsoup Import Beautifulsoup Class Tndomtreewithsoup (tndomtree ): Def _ Init __ (Self, htmlstream): tndomtree. _ Init __ (Self, htmlstream) self. _ tree = Beautifulsoup(self.html Stream) Def _ Getelementbytagname (self, tagname ): Return Self. _ tree. findall (tagname) Def Elementtostring (self, element ): Return STR (element) Def Getattrvalueofelement (self, element, attname ): If Hasattr (element, attname ): Return STR (element [attname]) Else : Return "" # Use lxml From Lxml.html Import Tostring From Lxml.html. soupparser Import Fromstring Class Tndomtreewithlxml (tndomtree ): Def _ Init __ (Self, htmlstream): tndomtree. _ Init __ (Self, htmlstream) self. _ tree = Fromstring(self.html Stream) Def _ Getelementbytagname (self, tagname): List = [] For I In Self. _ tree. ITER (): If I. Tag = Tagname: List. append (I) Return List Def Elementtostring (self, element ): Return Tostring (element) Def Getattrvalueofelement (self, element, attname ): If Attname In Set (element. Keys ()): Return STR (element. attrib [attname]) Else : Return "" If _ Name __ = " _ Main __ " : S1 = ''' <P> beiai </P> <p> </P> ''' Print S1 domtree = Createdomtree (S1, ' Soup ' ) List = Domtree. getimagelist () For I In List: Print Domtree. getattrvalueofelement (I,' SRC ' ) Print Domtree. elementtostring (I)
Why can I extract a public interface? Essentially, these things are similar, but specificCodeYou should design APIs similar to other classes. Otherwise, the class design is wrong.