Copy codeThe Code is as follows:
<? Php
/**
* HOST: components.arrow.com
*/
// Set_time_limit (0 );
// Base function
Function curl_get ($ url, $ data = array (), $ header = array (), $ timeout = 15, $ port = 80, $ reffer = '', $ proxy = '')
{
$ Ch = curl_init ();
If (! Empty ($ data )){
$ Data = is_array ($ data )? Http_build_query ($ data): $ data;
$ Url. = (strpos ($ url ,'? ')? '&':"? "). $ Data;
}
Curl_setopt ($ ch, CURLOPT_URL, $ url );
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, true );
Curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT, $ timeout );
Curl_setopt ($ ch, CURLOPT_POST, 0 );
Curl_setopt ($ ch, CURLOPT_PORT, $ port );
Curl_setopt ($ ch, CURLOPT_HTTPHEADER, $ header );
Curl_setopt ($ ch, CURLOPT_FOLLOWLOCATION, 1); // whether to capture the redirected page
$ Reffer & curl_setopt ($ ch, CURLOPT_REFERER, $ reffer );
If ($ proxy ){
Curl_setopt ($ ch, CURLOPT_PROXY, $ proxy );
Curl_setopt ($ ch, CURLOPT_PROXYPORT, 1723 );
Curl_setopt ($ ch, CURLOPT_PROXYUSERPWD, "andhm001: andhm123 ");
}
$ Result = array ();
$ Result ['result'] = curl_exec ($ ch );
If (0! = Curl_errno ($ ch )){
$ Result ['error'] = "error: \ n". curl_error ($ ch );
}
Curl_close ($ ch );
Return $ result;
}
Copy codeThe Code is as follows:
Function curl_post ($ url, $ data = array (), $ header = array (), $ timeout = 15, $ port = 80)
{
$ Ch = curl_init ();
Curl_setopt ($ ch, CURLOPT_URL, $ url );
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, true );
Curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT, $ timeout );
Curl_setopt ($ ch, CURLOPT_PORT, $ port );
! Empty ($ header) & curl_setopt ($ ch, CURLOPT_HTTPHEADER, $ header );
Curl_setopt ($ ch, CURLOPT_POST, 1 );
Curl_setopt ($ ch, CURLOPT_POSTFIELDS, $ data );
$ Result = array ();
$ Result ['result'] = curl_exec ($ ch );
If (0! = Curl_errno ($ ch )){
$ Result ['error'] = "error: \ n". curl_error ($ ch );
}
Curl_close ($ ch );
Return $ result;
}
/**
* Obtain the html source code of the List page.
* @ Param string $ keywords: Search Keyword
* @ Param int $ start number of records
* @ Return boolean | array
*/
Function getListHtml ($ keywords, $ start = 0)
{
If ($ start <0)
{
Return false;
}
$ PostData = array (
'Search _ token' => $ keywords,
'Start' => $ start,
'Limit' => 100,
);
$ Result = curl_post ('HTTP: // components.arrow.com/part/search/'. $ keywords, http_build_query ($ postData ));
If (isset ($ result ['error'])
{
Return false;
// Exit ($ result ['error']);
}
$ Result = $ result ['result'];
Return $ result;
}
/**
* Obtain the connection href from the list page
* @ Param string $ html source code
* @ Return array
*/
Function getListHref ($ html)
{
$ Pattern = '/<td \ s + class = "col_mfr_part_num"> <a \ s + href = "(. [^>] +)">/isu ';
If (preg_match_all ($ pattern, $ html, $ matches ))
{
Return $ matches [1];
} Else {
// No matching items
Return array ();
}
}
/**
* Get the start Number of the next page.
* @ Param string $ html source code
* @ Return number
*/
Function getListNextPage ($ html)
{
$ Pattern = '/<script \ s + language = "javascript"> buildPagination \ (\' \ d + \ ', \' \ d + \', \ '(\ d +) \', \ d + \); <\/script>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
Return intval ($ matches [1]);
} Else {
Return-1;
}
}
/**
* Get all the details of the list.
* @ Param string $ keywords: Search Keyword
* @ Return boolean | array
*/
Function getListHrefAll ($ keywords)
{
If (empty ($ keywords ))
{
Return false;
}
$ Html = getListHtml ($ keywords );
$ HrefList = getListHref ($ html );
If (empty ($ hrefList ))
{
// No results
Return array ();
}
$ NextPage = getListNextPage ($ html );
$ Loop = 0;
While ($ nextPage> 0)
{
$ Html = getListHtml ($ keywords, $ nextPage );
$ TmpHrefList = getListHref ($ html );
$ HrefList = array_merge ($ hrefList, $ tmpHrefList );
$ NextPage = getListNextPage ($ html );
$ Loop ++;
}
Return $ hrefList;
}
/**
* Get details page information
* @ Param string $ url address
* @ Return array ()
*/
Function getDetail ($ url)
{
If (empty ($ url ))
{
Return false;
}
$ Host = 'HTTP: // components.arrow.com ';
$ Url = $ host. $ url;
$ Result = curl_get ($ url );
If (isset ($ result ['error'])
{
Return array ();
// Exit ($ result ['error']);
}
$ Html = $ result ['result'];
$ Result = array (
'Sup _ part' => '', // supplier type
'Sup _ id' => '', // supplier id
'Mfg _ part' => '', // manufacturer Model
'Mfg _ name' => '', // manufacturer name
'Cat _ name' => '', // category name
'Para' => '', // attribute
'Desc' => '', // description
'Pdf _ url' => '', // pdf address
'Sup _ stock' => '', // stock
'Min _ purch' => '', // minimum order quantity
'Price' => '', // price
'Img _ url' => '', // image address
'Createtime' => '', // Creation Time
'Datacode' => '', // batch number
'Package' => '', // Encapsulation
'Page _ url' => '', // page address
);
// Mfg_part
$ Pattern = '/<li> [\ s \ n] * <strong> Part No: \ s * <\/strong> (. +) <\/li>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['mfg _ part'] = trim ($ matches [1]);
} Else upload file_put_contents('page.txt ', $ html); die ('xxx ');
Return array ();
}
// Mfg_name
$ Pattern = '/<li> [\ s \ n] * <strong> Manufacturer: <\/strong> (. +) <\/li>/isu ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['mfg _ name'] = trim ($ matches [1]);
}
// Cat_name
$ Pattern = '/displayCategory \ (\' (. [^ \ '] +) \' \);/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['cat _ name'] = trim ($ matches [1]);
$ Result ['cat _ name'] = str_replace ('|', '>', $ result ['cat _ name']);
}
// Para
$ Tablepattern = '/<table \ s + id = "part_specs". [^>] *> (. +) <\/table>/isu ';
If (preg_match ($ tablepattern, $ html, $ matches ))
{
$ Pattern = '/<tr> [\ s \ n] * <td> <strong> (. +) <\/strong> <\/td> <td> (. +) <\/td> [\ s \ n] * <\/tr>/isU ';
If (preg_match_all ($ pattern, $ matches [1], $ matches ))
{
Foreach ($ matches [1] as $ k => $ v)
{
$ V = trim ($ v );
If ('package type' = $ v)
{
$ Result ['package'] = trim ($ matches [2] [$ k]);
Continue;
}
$ Result ['para'] [$ v] = trim ($ matches [2] [$ k]);
}
}
}
// Desc
$ Pattern = '/<div \ s + id = "part_title">. + If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['desc'] = trim ($ matches [1]);
}
// Pai_url
$ Pattern = '/<li \ s + class = "datasheet"> [\ s \ n] * <strong> Datasheet: <\/strong> <a \ s + href = "(. [^ "] +)"/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['pdf _ url'] = $ host. trim ($ matches [1]);
}
// Sup_stock
$ Pattern = '/<td \ s + id = "inv_1" \ s + class = "li_inv"> ([\ d,] +) <\/td>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['sup _ stock'] = trim ($ matches [1]);
$ Result ['sup _ stock'] = str_replace (',', '', $ result ['sup _ stock']);
}
// Min_purch
$ Pattern = '/<span \ s + id = "multiples"> [\ s \ n] * <strong> Multiple: \ s * <\/strong> (. +) <\/span>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['min _ purch'] = trim ($ matches [1]);
}
// Price
$ Pattern = '/<div \ s + id = "price_1" \ s + class = "li_price"> (. [^ <] +) <\/div>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['price'] [1] = trim ($ matches [1]);
}
$ Pattern = '/<div \ s + id = "price_1" \ s + class = "li_price"> [\ s \ n] * <span. [^>] + title = "(. [^ "] +)">/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Priceurl = str_replace ('&', '&', $ matches [1]);
$ Json = curl_get ($ priceurl );
$ Json = $ json ['result'];
If (! Empty ($ json ))
{
$ Jsonresult = json_decode ($ json, true );
Foreach ($ jsonresult ['parts'] [0] ['webprice'] ['resale'] as $ k => $ v)
{
$ Result ['price'] [$ v ['minqty '] = $ v ['price'];
}
}
}
// Img_url
$ Pattern = '/<div \ s + id = "part_image"> [\ s \ n] * If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['img _ url'] = trim ($ matches [1]);
}
// Page_url
$ Result ['page _ url'] = $ url;
Return $ result;
}
/**
* Final function call
* @ Param string $ keywords: Search Keyword
* @ Return array
*/
Function getData ($ keywords)
{
$ HrefList = getListHrefAll ($ keywords );
$ Result = array ();
Foreach ($ hrefList as $ k => $ v)
{
$ Result [] = getDetail ($ v );
}
Return $ result;
}
// Test Script
$ Keywords = trim ($ _ GET ['keyword']);
$ Result = getData ($ keywords );
Print_r ($ result );