Share the following page keywords to capture www.icbase.com site code (with asp.net parameters) _ php instance

Source: Internet
Author: User
This article describes how to capture the website code www.icbase.com (with the asp.net parameter) with the keyword on the page. For more information, see The code is as follows:


/**
* HOST: www.icbase.com
*/
// Set_time_limit (0 );
// Base function
Function curl_get ($ url, $ data = array (), $ header = array (), $ timeout = 15, $ port = 80, $ reffer = '', $ proxy = '')
{
$ Ch = curl_init ();
If (! Empty ($ data )){
$ Data = is_array ($ data )? Http_build_query ($ data): $ data;
$ Url. = (strpos ($ url ,'? ')? '&':"? "). $ Data;
}
Curl_setopt ($ ch, CURLOPT_URL, $ url );
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, true );
Curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT, $ timeout );
Curl_setopt ($ ch, CURLOPT_POST, 0 );
Curl_setopt ($ ch, CURLOPT_PORT, $ port );
Curl_setopt ($ ch, CURLOPT_HTTPHEADER, $ header );
Curl_setopt ($ ch, CURLOPT_FOLLOWLOCATION, 1); // whether to capture the redirected page
$ Reffer & curl_setopt ($ ch, CURLOPT_REFERER, $ reffer );
If ($ proxy ){
Curl_setopt ($ ch, CURLOPT_PROXY, $ proxy );
Curl_setopt ($ ch, CURLOPT_PROXYPORT, 1723 );
Curl_setopt ($ ch, CURLOPT_PROXYUSERPWD, "andhm001: andhm123 ");
}

$ Result = array ();
$ Result ['result'] = curl_exec ($ ch );
If (0! = Curl_errno ($ ch )){
$ Result ['error'] = "error: \ n". curl_error ($ ch );

}
Curl_close ($ ch );
Return $ result;
}

The code is as follows:


Function curl_post ($ url, $ data = array (), $ header = array (), $ timeout = 5, $ port = 80)
{
$ Ch = curl_init ();
Curl_setopt ($ ch, CURLOPT_URL, $ url );
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, true );
Curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT, $ timeout );
// Curl_setopt ($ ch, CURLOPT_PORT, $ port );
! Empty ($ header) & curl_setopt ($ ch, CURLOPT_HTTPHEADER, $ header );
Curl_setopt ($ ch, CURLOPT_POST, 1 );
Curl_setopt ($ ch, CURLOPT_POSTFIELDS, $ data );

$ Result = array ();
$ Result ['result'] = curl_exec ($ ch );
If (0! = Curl_errno ($ ch )){
$ Result ['error'] = "error: \ n". curl_error ($ ch );

}
Curl_close ($ ch );

Return $ result;
}

/**
* Obtain the html source code of the list page.
* @ Param string $ keywords: search keyword
* @ Param int $ page number
* @ Return boolean | array
*/
Function getListHtml ($ keywords, $ page = 1)
{
If ($ page <0)
{
Return false;
}
$ Page = 0? 1: intval ($ page );
If ($ page = 1)
{
$ Result = curl_get ('http: // www.icbase.com/ProResult.aspx', array ('prokey' => $ keywords ));
If (isset ($ result ['error'])
{
Return false;
// Exit ($ result ['error']);
}
$ Result = $ result ['result'];

// Asp.net post submits data
If (! Defined ('_ viewstate') & preg_match ('/ {
Define ('_ viewstate', $ matches [1]);
} Else {
Return false;
}

If (! Defined ('_ previouspage') & preg_match ('/ {
Define ('_ previouspage', $ matches [1]);
} Else {
Return false;
}

If (! Defined ('_ EVENTVALIDATION') & preg_match ('/ {
Define ('_ EVENTVALIDATION', $ matches [1]);
} Else {
Return false;
}

Return $ result;
}
$ Data = array (
'_ Eventtarget' => 'pager ',
'_ EVENTARGUMENT' => $ page,
'_ Viewstate' => _ VIEWSTATE,
'_ Previouspage' => _ PREVIOUSPAGE,
'_ EVENTVALIDATION' => _ EVENTVALIDATION,
);
$ Result = curl_post ('http: // www.icbase.com/ProResult.aspx? ProKey = '. $ keywords, $ data );
If (isset ($ result ['error'])
{
Return false;
// Exit ($ result ['error']);
}
$ Result = $ result ['result'];
Return $ result;
}

/**
* Obtain the url of link a on the list page.
* @ Param string $ html source code
* @ Return array
*/
Function getListHref ($ html)
{
$ Pattern = '/[\ s \ n] *] \/>/isu ';
If (preg_match_all ($ pattern, $ html, $ matches ))
{
Return $ matches [1];
} Else {
// No matching items
Return array ();
}
}

/**
* Get the number of the next page
* @ Param string $ html source code
* @ Return number
*/
Function getListNextPage ($ html)
{
$ Pattern = '/ ]>. +> <\/A>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
Return intval ($ matches [1]);
} Else {
Return-1;
}
}

/**
* Obtain all href of the list.
* @ Param string $ keywords: search keyword
* @ Return boolean | array
*/
Function getListHrefAll ($ keywords)
{
If (empty ($ keywords ))
{
Return false;
}

$ Html = getListHtml ($ keywords );
$ HrefList = getListHref ($ html );
If (empty ($ hrefList ))
{
// No results
Return array ();
}
$ NextPage = getListNextPage ($ html );
While ($ nextPage> 0)
{
$ Html = getListHtml ($ keywords, $ nextPage );
$ TmpHrefList = getListHref ($ html );
$ HrefList = array_merge ($ hrefList, $ tmpHrefList );
$ NextPage = getListNextPage ($ html );
}
Return $ hrefList;
}

/**
* Get details page information
* @ Param string $ the url or the captured html source code is differentiated by @ see $ is_url
* @ Param int $ is_url 1 uses url address 0 to directly process html source code
* @ Return boolean | multitype: string
*/
Function getDetail ($ url, $ is_url = 1)
{
If (empty ($ url ))
{
Return false;
}
$ Host = 'www .icbase.com ';
$ Html = $ url;
If ($ is_url ){
$ Url = '/'. ltrim ($ url ,'/');
$ Result = curl_get ($ host. $ url );
If (isset ($ result ['error'])
{
Exit ($ result ['error']);
}
$ Html = $ result ['result'];
}

$ Result = array (
'Sup _ part' => '', // supplier model
'Sup _ id' => '', // supplier id
'Mfg _ part' => '', // manufacturer model
'Mfg _ name' => '', // manufacturer name
'Cat _ name' => '', // category name
'Para' => '', // attribute
'Desc' => '', // description
'PDF _ url' => '', // pdf address
'Sup _ stock' => '', // stock
'Min _ purch' => '', // minimum order quantity
'Price' => '', // price
'IMG _ url' => '', // Image address
'Createtime' => '', // creation time
'Datacode' => '', // batch number
'Package' => '', // encapsulation
'Page _ url' => '', // page address
);

// Mfg_part
$ Pattern = '/Product Model <\/td>(. [^ <] +)If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['mfg _ part'] = trim ($ matches [1]);
} Else {
// This item is available, indicating that it is not everywhere
Return array ();
}

// Mfg_name
$ Pattern = '/Vendor <\/td> [\ s \ n] *(. +) <\/Td>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['mfg _ name'] = trim ($ matches [1]);
}

// Para
$ Pattern = '/ (. +) <\/Tr> <\/table>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
If (preg_match_all ('/(. +) <\/Td>/isU ', $ matches [1], $ matches ))
{
$ Count = count ($ matches [1]);
$ Count = intval ($ count/2 );
Foreach ($ matches [1] as $ k => $ v)
{
If ($ k> = $ count)
{
Break;
}
If (trim ($ v) = 'Description ')
{
// Desc
$ Result ['desc'] = trim ($ matches [1] [$ count + $ k]);
Continue;
}
$ V = trim ($ v );
$ Result ['para'] [$ v] = trim ($ matches [1] [$ count + $ k]);
}
}
}

// Pai_url
$ Pattern = '/Details <\/td>If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['PDF _ url'] = trim ($ matches [1]);
}

// Sup_stock
$ Pattern = '/Inventory quantity <\/td> [\ s \ n] *(\ D +) <\/td>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['Sup _ stock'] = trim ($ matches [1]);
}

// Price
$ Pattern = '/ ] +> (\ D +) \ + <\/td> ] +>. [^ \ D] * ([\ d.] +) <\/td> <\/tr>/isU ';
If (preg_match_all ($ pattern, $ html, $ matches ))
{
Foreach ($ matches [1] as $ k => $ v)
{
$ Result ['price'] [$ v] = '¥'. $ matches [2] [$ k];
}
}

// Img_url
$ Pattern = '/Image <\/td>If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['IMG _ url'] = trim ($ matches [1]);
}

// Page_url
If ($ is_url)
{
$ Result ['page _ url'] = $ host. $ url;
}

Return $ result;
}

/**
* Final function call
* @ Param string $ keywords: search keyword
* @ Return array
*/
Function getData ($ keywords)
{
$ HrefList = getListHrefAll ($ keywords );
$ Result = array ();

Foreach ($ hrefList as $ k => $ v)
{
$ Result [] = getDetail ($ v );
}

Return $ result;
}

// Test Script
$ Keywords = trim ($ _ GET ['keyword']);
$ Result = getData ($ keywords );

Print_r ($ result );

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.