Copy Code code as follows:
<?php
/**
* HOST:www.icbase.com
*/
Set_time_limit (0);
Base function
function Curl_get ($url, $data = Array (), $header = Array (), $timeout =, $port =, $reffer = ', $proxy = ')
{
$ch = Curl_init ();
if (!empty ($data)) {
$data = Is_array ($data) http_build_query ($data): $data;
$url. = (Strpos ($url, '? ') ' & ': "?" . $data;
}
curl_setopt ($ch, Curlopt_url, $url);
curl_setopt ($ch, Curlopt_returntransfer, true);
curl_setopt ($ch, Curlopt_connecttimeout, $timeout);
curl_setopt ($ch, curlopt_post, 0);
curl_setopt ($ch, Curlopt_port, $port);
curl_setopt ($ch, Curlopt_httpheader, $header);
curl_setopt ($ch, curlopt_followlocation, 1); Whether to crawl the page after the jump
$reffer && curl_setopt ($ch, Curlopt_referer, $reffer);
if ($proxy) {
curl_setopt ($ch, Curlopt_proxy, $proxy);
curl_setopt ($ch, Curlopt_proxyport, 1723);
curl_setopt ($ch, Curlopt_proxyuserpwd, "andhm001:andhm123");
}
$result = Array ();
$result [' result '] = curl_exec ($ch);
if (0!= Curl_errno ($ch)) {
$result [' error '] = "error:\n". Curl_error ($ch);
}
Curl_close ($ch);
return $result;
}
Copy Code code as follows:
function Curl_post ($url, $data = Array (), $header = Array (), $timeout = 5, $port = 80)
{
$ch = Curl_init ();
curl_setopt ($ch, Curlopt_url, $url);
curl_setopt ($ch, Curlopt_returntransfer, true);
curl_setopt ($ch, Curlopt_connecttimeout, $timeout);
curl_setopt ($ch, Curlopt_port, $port);
!empty ($header) && curl_setopt ($ch, Curlopt_httpheader, $header);
curl_setopt ($ch, Curlopt_post, 1);
curl_setopt ($ch, Curlopt_postfields, $data);
$result = Array ();
$result [' result '] = curl_exec ($ch);
if (0!= Curl_errno ($ch)) {
$result [' error '] = "error:\n". Curl_error ($ch);
}
Curl_close ($ch);
return $result;
}
/**
* Get the HTML source for the list page
* @param string $keywords search keywords
* @param int $page pages
* @return Boolean|array
*/
function getlisthtml ($keywords, $page =1)
{
if ($page < 0)
{
return false;
}
$page = $page = = 0? 1:intval ($page);
if ($page = = 1)
{
$result = Curl_get (' http://www.icbase.com/ProResult.aspx ', Array (' Prokey ' => $keywords));
if (Isset ($result [' Error '])]
{
return false;
Exit ($result [' Error ']);
}
$result = $result [' result '];
asp.net post submission data
if (! defined (' __viewstate ') && preg_match ('/<input\s+type= "hidden" \s+name= "__viewstate" \s+id= "__ VIEWSTATE "\s+value=" (. [ ^ "]+)"/isu ', $result, $matches))
{
Define (' __viewstate ', $matches [1]);
} else {
return false;
}
if (! defined (' __previouspage ') && preg_match ('/<input\s+type= "hidden" \s+name= "__previouspage" \s+id= "__ PreviousPage "\s+value=" (. [ ^ "]+)"/isu ', $result, $matches))
{
Define (' __previouspage ', $matches [1]);
} else {
return false;
}
if (! defined (' __eventvalidation ') && preg_match ('/<input\s+type= "hidden" \s+name= "__eventvalidation" \s+ Id= "__eventvalidation" \s+value= "(. [ ^ "]+)"/isu ', $result, $matches))
{
Define (' __eventvalidation ', $matches [1]);
} else {
return false;
}
return $result;
}
$data = Array (
' __eventtarget ' => ' pager ',
' __eventargument ' => $page,
' __viewstate ' => __viewstate,
' __previouspage ' => __previouspage,
' __eventvalidation ' => __eventvalidation,
);
$result = Curl_post (' http://www.icbase.com/ProResult.aspx?ProKey= '). $keywords, $data);
if (Isset ($result [' Error '])]
{
return false;
Exit ($result [' Error ']);
}
$result = $result [' result '];
return $result;
}
/**
* Get the URL of a link to the list page
* @param string $html HTML source code
* @return Array
*/
function Getlisthref ($html)
{
$pattern = '/<a\s+href=\ ' (. [ ^\ ']+ ', ' \s+target= ' _blank ' \s*>[\s\n]*]\/>/isu ';
if (Preg_match_all ($pattern, $html, $matches))
{
return $matches [1];
} else {
No matches
return Array ();
}
}
/**
* Get Next page number
* @param string $html HTML source code
* @return Number
*/
function Getlistnextpage ($html)
{
$pattern = '/<div\s+id= ' Pager ". +[^>]>.+<a\s+href=" javascript\:__dopostback\ (\ ' pager\ ', \ ' (\d+) \ ') " >><\/a>/isu ';
if (Preg_match ($pattern, $html, $matches))
{
return Intval ($matches [1]);
} else {
return-1;
}
}
/**
* Get list also all of the href
* @param string $keywords search keywords
* @return Boolean|array
*/
function Getlisthrefall ($keywords)
{
if (empty ($keywords))
{
return false;
}
$html = getlisthtml ($keywords);
$hrefList = Getlisthref ($html);
if (empty ($hrefList))
{
No results
return Array ();
}
$nextPage = Getlistnextpage ($html);
while ($nextPage > 0)
{
$html = getlisthtml ($keywords, $nextPage);
$tmpHrefList = Getlisthref ($html);
$hrefList = Array_merge ($hrefList, $tmpHrefList);
$nextPage = Getlistnextpage ($html);
}
return $hrefList;
}
/**
* Get Details Page information
* @param string $url URL address or is crawled HTML source code according to @see $is _url distinction
* @param int $is _url 1 uses URL address 0 to directly process HTML source code
* @return boolean|multitype:|multitype:string
*/
function Getdetail ($url, $is _url = 1)
{
if (empty ($url))
{
return false;
}
$host = ' www.icbase.com ';
$html = $url;
if ($is _url) {
$url = '/'. LTrim ($url, '/');
$result = Curl_get ($host. $url);
if (Isset ($result [' Error '])]
{
Exit ($result [' Error ']);
}
$html = $result [' result '];
}
$result = Array (
' Sup_part ' => ',//supplier model
' sup_id ' => ',//supplier ID
' Mfg_part ' => ',//Manufacturer model
' Mfg_name ' => ',//Manufacturer name
' Cat_name ' => ',//category name
' Para ' => ',//property
' desc ' => ',//description
' Pdf_url ' => ',//PDF address
' Sup_stock ' => ',//Stock
' Min_purch ' => ',//min. Order Quantity
' Price ' => ',//Prices
' Img_url ' => ',//Picture address
' Createtime ' => ',//creation time
' Datacode ' => ',//Lot number
' Package ' => ',//encapsulation
' Page_url ' => ',//page address
);
Mfg_part
$pattern = '/<td> product model <\/td><td> (. [ ^<]+) </isu ';
if (Preg_match ($pattern, $html, $matches))
{
$result [' mfg_part '] = Trim ($matches [1]);
} else {
This wood has, the description is not everywhere
return Array ();
}
Mfg_name
$pattern = '/<td> manufacturer <\/td>[\s\n]*<td> (. +) <\/td>/isu ';
if (Preg_match ($pattern, $html, $matches))
{
$result [' mfg_name '] = Trim ($matches [1]);
}
Para
$pattern = '/<tr\s+style= ' background-color: #E9E9E9; color:black; Font-weight:bold; " > (. +) <\/tr><\/table>/isu ';
if (Preg_match ($pattern, $html, $matches))
{
if (Preg_match_all ('/<td> (. +) <\/td>/isu ', $matches [1], $matches))
{
$count = count ($matches [1]);
$count = Intval ($count/2);
foreach ($matches [1] as $k => $v)
{
if ($k >= $count)
{
Break
}
if (Trim ($v) = = ' description ')
{
Desc
$result [' desc '] = Trim ($matches [1][$count + $k]);
Continue
}
$v = Trim ($v);
$result [' Para '] [$v] = Trim ($matches [1][$count + $k]);
}
}
}
Pdf_url
$pattern = '/<td> details <\/td><td><a\s+href= ' (. [ ^ "]+") "/isu";
if (Preg_match ($pattern, $html, $matches))
{
$result [' pdf_url '] = Trim ($matches [1]);
}
Sup_stock
$pattern = '/<td> inventory quantity <\/td>[\s\n]*<td> (\d+) <\/td>/isu ';
if (Preg_match ($pattern, $html, $matches))
{
$result [' sup_stock '] = Trim ($matches [1]);
}
Price
$pattern = '/<tr><td. [^>]+> (\d+) \+<\/td><td. [^>]+>. [^\d]* ([\d.] +) <\/td><\/tr>/isu ';
if (Preg_match_all ($pattern, $html, $matches))
{
foreach ($matches [1] as $k => $v)
{
$result [' Price '] [$v] = ' ¥ '. $matches [2][$k];
}
}
Img_url
$pattern = '/<td> picture <\/td><td>if (Preg_match ($pattern, $html, $matches))
{
$result [' img_url '] = Trim ($matches [1]);
}
Page_url
if ($is _url)
{
$result [' page_url '] = $host. $url;
}
return $result;
}
/**
* Final Call function
* @param string $keywords search keywords
* @return Array
*/
function GetData ($keywords)
{
$hrefList = Getlisthrefall ($keywords);
$result = Array ();
foreach ($hrefList as $k => $v)
{
$result [] = Getdetail ($v);
}
return $result;
}
Test Script
$keywords = Trim ($_get[' keywords ']);
$result = GetData ($keywords);
Print_r ($result);