A php text collection class
- $ C = new Collection ();
- $ C-> url = 'http: // bbs.it-home.org/subject_search? Cat = 1001 & search_text = % E5 % B9 % B4 % E8 % BD % BB % E4 % BA ';
- $ C-> startFlag ='';
- $ C-> endFlag ='
';
- $ C-> init ();
- $ C-> regExp = "|
(.*) (.*) | Uis ";
- $ C-> parse ();
- Print_rr ($ C-> result );
- */
Php text collection files:
<%
- /**
- Module name: php text collection class
- **/
- Class Collection {
- // Public portal
- Var $ url; // url to be analyzed
- Var $ content; // read content
- Var $ regExp; // the regular expression of the part to be obtained
- Var $ codeFrom; // encoding of the original text
- Var $ codeTo; // encoding to be converted
- Var $ timeout; // Collection wait time
Var $ startFlag; // The marker that is collected from the document is 0 by default. when collecting entries, only text blocks between $ startFlag and $ endFlag are searched and collected.
- Var $ endFlag; // the marker for collecting end-of-article data. by default, only text blocks between $ startFlag and $ endFlag are searched and collected at the end of the article.
- Var $ block; // text block between $ startFlag and $ endFlag
- // Export private
- Var $ result; // output result
// Initialize the collector
- Function init (){
- If (empty ($ url ))
- $ This-> getFile ();
- $ This-> convertEncoding ();
- }
- // Collect required content
- Function parse (){
- $ This-> getBlock ();
- Preg_match_all ($ this-> regExp, $ this-> block, $ this-> result, PREG_SET_ORDER );
- Return $ this-> block;
- }
- // Handle errors
- Function error ($ msg ){
- Echo $ msg;
- }
- // If the remote webpage is successfully read, the system returns the object; if the remote webpage fails, the system returns false.
- Function getFile (){
- $ Datalines = @ file ($ this-> url );
- If (! $ Datalines ){
- $ This-> error ("can't read the url:". $ this-> url );
- Return false;
- } Else {
- $ Importdata = implode ('', $ datalines );
- $ Importdata = str_replace (array ("\ r \ n", "\ r"), "\ n", $ importdata );
- $ This-> content = $ importdata;
- }
- }
- // Obtain the required text block
- Function getBlock (){
- If (! Empty ($ this-> startFlag ))
- $ This-> block = substr ($ this-> content, strpos ($ this-> content, $ this-> startFlag ));
- If (! Empty ($ this-> endFlag ))
- $ This-> block = substr ($ this-> block, 0, strpos ($ this-> block, $ this-> endFlag ));
- }
- // Content encoding conversion
- Function convertEncoding (){
- If (! Empty ($ this-> codeTo ))
- $ This-> codeFrom = mb_detect_encoding ($ this-> content );
- // If a conversion scheme is specified, the conversion is executed.
- If (! Empty ($ this-> codeTo ))
- $ This-> content = mb_convert_encoding ($ this-> content, $ this-> codeTo, $ this-> codeFrom) or $ this-> error ("can't convert Encoding ");
- }
- } // End of class
- ?>
|