Copy CodeThe code is as follows:
/**
* HOST:www.icbase.com
*/
Set_time_limit (0);
Base function
function Curl_get ($url, $data = Array (), $header = Array (), $timeout = all, $port = A, $reffer = ", $proxy =")
{
$ch = Curl_init ();
if (!empty ($data)) {
$data = Is_array ($data)? Http_build_query ($data): $data;
$url. = (Strpos ($url, '? ')? ' & ': "?") . $data;
}
curl_setopt ($ch, Curlopt_url, $url);
curl_setopt ($ch, Curlopt_returntransfer, true);
curl_setopt ($ch, Curlopt_connecttimeout, $timeout);
curl_setopt ($ch, curlopt_post, 0);
curl_setopt ($ch, Curlopt_port, $port);
curl_setopt ($ch, Curlopt_httpheader, $header);
curl_setopt ($ch, curlopt_followlocation, 1); Whether to crawl the page after the jump
$reffer && curl_setopt ($ch, Curlopt_referer, $reffer);
if ($proxy) {
curl_setopt ($ch, Curlopt_proxy, $proxy);
curl_setopt ($ch, Curlopt_proxyport, 1723);
curl_setopt ($ch, Curlopt_proxyuserpwd, "andhm001:andhm123");
}
$result = Array ();
$result [' result '] = curl_exec ($ch);
if (0! = Curl_errno ($ch)) {
$result [' error '] = "error:\n". Curl_error ($ch);
}
Curl_close ($ch);
return $result;
}
Copy CodeThe code is as follows:
function Curl_post ($url, $data = Array (), $header = Array (), $timeout = 5, $port = 80)
{
$ch = Curl_init ();
curl_setopt ($ch, Curlopt_url, $url);
curl_setopt ($ch, Curlopt_returntransfer, true);
curl_setopt ($ch, Curlopt_connecttimeout, $timeout);
curl_setopt ($ch, Curlopt_port, $port);
!empty ($header) && curl_setopt ($ch, Curlopt_httpheader, $header);
curl_setopt ($ch, Curlopt_post, 1);
curl_setopt ($ch, Curlopt_postfields, $data);
$result = Array ();
$result [' result '] = curl_exec ($ch);
if (0! = Curl_errno ($ch)) {
$result [' error '] = "error:\n". Curl_error ($ch);
}
Curl_close ($ch);
return $result;
}
/**
* Get the HTML source of the list page
* @param string $keywords search keywords
* @param int $page pages
* @return Boolean|array
*/
function getlisthtml ($keywords, $page =1)
{
if ($page < 0)
{
return false;
}
$page = $page = = 0? 1:intval ($page);
if ($page = = 1)
{
$result = Curl_get (' http://www.icbase.com/ProResult.aspx ', Array (' prokey ' = = $keywords));
if (Isset ($result [' Error ']))
{
return false;
Exit ($result [' Error ']);
}
$result = $result [' result '];
ASP. NET Post submission data
if (! defined (' __viewstate ') && preg_match ('/ {
Define (' __viewstate ', $matches [1]);
} else {
return false;
}
if (! defined (' __previouspage ') && preg_match ('/ {
Define (' __previouspage ', $matches [1]);
} else {
return false;
}
if (! defined (' __eventvalidation ') && preg_match ('/ {
Define (' __eventvalidation ', $matches [1]);
} else {
return false;
}
return $result;
}
$data = Array (
' __eventtarget ' = ' pager ',
' __eventargument ' = $page,
' __viewstate ' = __viewstate,
' __previouspage ' = __previouspage,
' __eventvalidation ' = __eventvalidation,
);
$result = Curl_post (' http://www.icbase.com/ProResult.aspx?ProKey= '. $keywords, $data);
if (Isset ($result [' Error ']))
{
return false;
Exit ($result [' Error ']);
}
$result = $result [' result '];
return $result;
}
/**
* Get URL of list page a link
* @param string $html HTML source
* @return Array
*/
function Getlisthref ($html)
{
$pattern = '/[\s\n]*]\/>/isu ';
if (Preg_match_all ($pattern, $html, $matches))
{
return $matches [1];
} else {
No matches
return Array ();
}
}
/**
* Get Next page number
* @param string $html HTML source
* @return Number
*/
function Getlistnextpage ($html)
{
$pattern = '/]>.+><\/a>/isu ';
if (Preg_match ($pattern, $html, $matches))
{
return Intval ($matches [1]);
} else {
return-1;
}
}
/**
* Get the list also all the href
* @param string $keywords search keywords
* @return Boolean|array
*/
function Getlisthrefall ($keywords)
{
if (empty ($keywords))
{
return false;
}
$html = getlisthtml ($keywords);
$hrefList = Getlisthref ($html);
if (empty ($hrefList))
{
No results
return Array ();
}
$nextPage = Getlistnextpage ($html);
while ($nextPage > 0)
{
$html = getlisthtml ($keywords, $nextPage);
$tmpHrefList = Getlisthref ($html);
$hrefList = Array_merge ($hrefList, $tmpHrefList);
$nextPage = Getlistnextpage ($html);
}
return $hrefList;
}
/**
* Get Details Page information
* @param string $url URL address or the crawled HTML source code is differentiated according to @see $is _url
* @param int $is _url 1 uses URL address 0 to directly process HTML source code
* @return boolean|multitype:|multitype:string
*/
function Getdetail ($url, $is _url = 1)
{
if (empty ($url))
{
return false;
}
$host = ' www.icbase.com ';
$html = $url;
if ($is _url) {
$url = '/'. LTrim ($url, '/');
$result = Curl_get ($host. $url);
if (Isset ($result [' Error ']))
{
Exit ($result [' Error ']);
}
$html = $result [' result '];
}
$result = Array (
' Sup_part ' and ' = ',//supplier model
' sup_id ' = ', '//supplier ID
' Mfg_part ' = ', '//Manufacturer model
' Mfg_name ' = ', '//Manufacturer name
' Cat_name ' + ', '//category name
' Para ' = ', '//Properties
' desc ' and ' = ',//description
' Pdf_url ' and ' = ',//PDF address
' Sup_stock ' and ' = ',//Stock
' Min_purch ' = ' and ',//min. Order Quantity
' Price ' = ', '//Prices
' Img_url ' + ',//Picture address
' Createtime ' = ',//creation time
' Datacode ' and ' = ',//Lot number
' Package ' + = ',//encapsulation
' Page_url ' and ' = ',//page address
);
Mfg_part
$pattern = '/Product model <\/td>(. [^<]+]if (Preg_match ($pattern, $html, $matches))
{
$result [' mfg_part '] = Trim ($matches [1]);
} else {
There's no explanation for the wood.
return Array ();
}
Mfg_name
$pattern = '/Manufacturer <\/td>[\s\n]*(. +) <\/td>/isu ';
if (Preg_match ($pattern, $html, $matches))
{
$result [' mfg_name '] = Trim ($matches [1]);
}
Para
$pattern = '/ (. +) <\/tr><\/table>/isu ';
if (Preg_match ($pattern, $html, $matches))
{
if (Preg_match_all ('/(. +) <\/td>/isu ', $matches [1], $matches))
{
$count = count ($matches [1]);
$count = Intval ($count/2);
foreach ($matches [1] as $k = $v)
{
if ($k >= $count)
{
Break
}
if (Trim ($v) = = ' description ')
{
Desc
$result [' desc '] = Trim ($matches [1][$count + $k]);
Continue
}
$v = Trim ($v);
$result [' Para '] [$v] = Trim ($matches [1][$count + $k]);
}
}
}
Pdf_url
$pattern = '/Detailed information <\/td>if (Preg_match ($pattern, $html, $matches))
{
$result [' pdf_url '] = Trim ($matches [1]);
}
Sup_stock
$pattern = '/Stock Quantity <\/td>[\s\n]*(\d+) <\/td>/isu ';
if (Preg_match ($pattern, $html, $matches))
{
$result [' sup_stock '] = Trim ($matches [1]);
}
Price
$pattern = '/ ]+> (\d+) \+<\/td> ]+>. [^\d]* ([\d.] +) <\/td><\/tr>/isu ';
if (Preg_match_all ($pattern, $html, $matches))
{
foreach ($matches [1] as $k = $v)
{
$result [' Price '] [$v] = ' ¥ '. $matches [2][$k];
}
}
Img_url
$pattern = '/Photo <\/td>if (Preg_match ($pattern, $html, $matches))
{
$result [' img_url '] = Trim ($matches [1]);
}
Page_url
if ($is _url)
{
$result [' page_url '] = $host. $url;
}
return $result;
}
/**
* Final Call function
* @param string $keywords search keywords
* @return Array
*/
function GetData ($keywords)
{
$hrefList = Getlisthrefall ($keywords);
$result = Array ();
foreach ($hrefList as $k = $v)
{
$result [] = Getdetail ($v);
}
return $result;
}
Test Script
$keywords = Trim ($_get[' keywords ');
$result = GetData ($keywords);
Print_r ($result);
http://www.bkjia.com/PHPjc/728095.html www.bkjia.com true http://www.bkjia.com/PHPjc/728095.html techarticle Copy the code as follows: PHP/** * HOST:www.icbase.com *///set_time_limit (0);//Base function function Curl_get ($url, $data = AR Ray (), $header = Array (), $timeout = $port ...