Share the following page keywords to capture the components.arrow.com site code

Source: Internet
Author: User

Copy codeThe Code is as follows:
<? Php
/**
* HOST: components.arrow.com
*/
// Set_time_limit (0 );
// Base function
Function curl_get ($ url, $ data = array (), $ header = array (), $ timeout = 15, $ port = 80, $ reffer = '', $ proxy = '')
{
$ Ch = curl_init ();
If (! Empty ($ data )){
$ Data = is_array ($ data )? Http_build_query ($ data): $ data;
$ Url. = (strpos ($ url ,'? ')? '&':"? "). $ Data;
}
Curl_setopt ($ ch, CURLOPT_URL, $ url );
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, true );
Curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT, $ timeout );
Curl_setopt ($ ch, CURLOPT_POST, 0 );
Curl_setopt ($ ch, CURLOPT_PORT, $ port );
Curl_setopt ($ ch, CURLOPT_HTTPHEADER, $ header );
Curl_setopt ($ ch, CURLOPT_FOLLOWLOCATION, 1); // whether to capture the redirected page
$ Reffer & curl_setopt ($ ch, CURLOPT_REFERER, $ reffer );
If ($ proxy ){
Curl_setopt ($ ch, CURLOPT_PROXY, $ proxy );
Curl_setopt ($ ch, CURLOPT_PROXYPORT, 1723 );
Curl_setopt ($ ch, CURLOPT_PROXYUSERPWD, "andhm001: andhm123 ");
}

$ Result = array ();
$ Result ['result'] = curl_exec ($ ch );
If (0! = Curl_errno ($ ch )){
$ Result ['error'] = "error: \ n". curl_error ($ ch );

}
Curl_close ($ ch );
Return $ result;
}

Copy codeThe Code is as follows:
Function curl_post ($ url, $ data = array (), $ header = array (), $ timeout = 15, $ port = 80)
{
$ Ch = curl_init ();
Curl_setopt ($ ch, CURLOPT_URL, $ url );
Curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, true );
Curl_setopt ($ ch, CURLOPT_CONNECTTIMEOUT, $ timeout );
Curl_setopt ($ ch, CURLOPT_PORT, $ port );
! Empty ($ header) & curl_setopt ($ ch, CURLOPT_HTTPHEADER, $ header );
Curl_setopt ($ ch, CURLOPT_POST, 1 );
Curl_setopt ($ ch, CURLOPT_POSTFIELDS, $ data );

$ Result = array ();
$ Result ['result'] = curl_exec ($ ch );
If (0! = Curl_errno ($ ch )){
$ Result ['error'] = "error: \ n". curl_error ($ ch );

}
Curl_close ($ ch );

Return $ result;
}

/**
* Obtain the html source code of the List page.
* @ Param string $ keywords: Search Keyword
* @ Param int $ start number of records
* @ Return boolean | array
*/
Function getListHtml ($ keywords, $ start = 0)
{
If ($ start <0)
{
Return false;
}

$ PostData = array (
'Search _ token' => $ keywords,
'Start' => $ start,
'Limit' => 100,
);

$ Result = curl_post ('HTTP: // components.arrow.com/part/search/'. $ keywords, http_build_query ($ postData ));
If (isset ($ result ['error'])
{
Return false;
// Exit ($ result ['error']);
}
$ Result = $ result ['result'];

Return $ result;
}

/**
* Obtain the connection href from the list page
* @ Param string $ html source code
* @ Return array
*/
Function getListHref ($ html)
{
$ Pattern = '/<td \ s + class = "col_mfr_part_num"> <a \ s + href = "(. [^>] +)">/isu ';
If (preg_match_all ($ pattern, $ html, $ matches ))
{
Return $ matches [1];
} Else {
// No matching items
Return array ();
}
}

/**
* Get the start Number of the next page.
* @ Param string $ html source code
* @ Return number
*/
Function getListNextPage ($ html)
{
$ Pattern = '/<script \ s + language = "javascript"> buildPagination \ (\' \ d + \ ', \' \ d + \', \ '(\ d +) \', \ d + \); <\/script>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
Return intval ($ matches [1]);
} Else {
Return-1;
}
}

/**
* Get all the details of the list.
* @ Param string $ keywords: Search Keyword
* @ Return boolean | array
*/
Function getListHrefAll ($ keywords)
{
If (empty ($ keywords ))
{
Return false;
}

$ Html = getListHtml ($ keywords );
$ HrefList = getListHref ($ html );
If (empty ($ hrefList ))
{
// No results
Return array ();
}
$ NextPage = getListNextPage ($ html );
$ Loop = 0;
While ($ nextPage> 0)
{
$ Html = getListHtml ($ keywords, $ nextPage );
$ TmpHrefList = getListHref ($ html );
$ HrefList = array_merge ($ hrefList, $ tmpHrefList );
$ NextPage = getListNextPage ($ html );
$ Loop ++;
}
Return $ hrefList;
}

/**
* Get details page information
* @ Param string $ url address
* @ Return array ()
*/
Function getDetail ($ url)
{
If (empty ($ url ))
{
Return false;
}
$ Host = 'HTTP: // components.arrow.com ';

$ Url = $ host. $ url;
$ Result = curl_get ($ url );
If (isset ($ result ['error'])
{
Return array ();
// Exit ($ result ['error']);
}
$ Html = $ result ['result'];

$ Result = array (
'Sup _ part' => '', // supplier type
'Sup _ id' => '', // supplier id
'Mfg _ part' => '', // manufacturer Model
'Mfg _ name' => '', // manufacturer name
'Cat _ name' => '', // category name
'Para' => '', // attribute
'Desc' => '', // description
'Pdf _ url' => '', // pdf address
'Sup _ stock' => '', // stock
'Min _ purch' => '', // minimum order quantity
'Price' => '', // price
'Img _ url' => '', // image address
'Createtime' => '', // Creation Time
'Datacode' => '', // batch number
'Package' => '', // Encapsulation
'Page _ url' => '', // page address
);

// Mfg_part
$ Pattern = '/<li> [\ s \ n] * <strong> Part No: \ s * <\/strong> (. +) <\/li>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['mfg _ part'] = trim ($ matches [1]);
} Else upload file_put_contents('page.txt ', $ html); die ('xxx ');
Return array ();
}

// Mfg_name
$ Pattern = '/<li> [\ s \ n] * <strong> Manufacturer: <\/strong> (. +) <\/li>/isu ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['mfg _ name'] = trim ($ matches [1]);
}

// Cat_name
$ Pattern = '/displayCategory \ (\' (. [^ \ '] +) \' \);/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['cat _ name'] = trim ($ matches [1]);
$ Result ['cat _ name'] = str_replace ('|', '>', $ result ['cat _ name']);
}

// Para
$ Tablepattern = '/<table \ s + id = "part_specs". [^>] *> (. +) <\/table>/isu ';
If (preg_match ($ tablepattern, $ html, $ matches ))
{
$ Pattern = '/<tr> [\ s \ n] * <td> <strong> (. +) <\/strong> <\/td> <td> (. +) <\/td> [\ s \ n] * <\/tr>/isU ';
If (preg_match_all ($ pattern, $ matches [1], $ matches ))
{
Foreach ($ matches [1] as $ k => $ v)
{
$ V = trim ($ v );
If ('package type' = $ v)
{
$ Result ['package'] = trim ($ matches [2] [$ k]);
Continue;
}
$ Result ['para'] [$ v] = trim ($ matches [2] [$ k]);
}
}
}

// Desc
$ Pattern = '/<div \ s + id = "part_title">. + If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['desc'] = trim ($ matches [1]);
}

// Pai_url
$ Pattern = '/<li \ s + class = "datasheet"> [\ s \ n] * <strong> Datasheet: <\/strong> <a \ s + href = "(. [^ "] +)"/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['pdf _ url'] = $ host. trim ($ matches [1]);
}

// Sup_stock
$ Pattern = '/<td \ s + id = "inv_1" \ s + class = "li_inv"> ([\ d,] +) <\/td>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['sup _ stock'] = trim ($ matches [1]);
$ Result ['sup _ stock'] = str_replace (',', '', $ result ['sup _ stock']);
}

// Min_purch
$ Pattern = '/<span \ s + id = "multiples"> [\ s \ n] * <strong> Multiple: \ s * <\/strong> (. +) <\/span>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['min _ purch'] = trim ($ matches [1]);
}

// Price
$ Pattern = '/<div \ s + id = "price_1" \ s + class = "li_price"> (. [^ <] +) <\/div>/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['price'] [1] = trim ($ matches [1]);
}
$ Pattern = '/<div \ s + id = "price_1" \ s + class = "li_price"> [\ s \ n] * <span. [^>] + title = "(. [^ "] +)">/isU ';
If (preg_match ($ pattern, $ html, $ matches ))
{
$ Priceurl = str_replace ('&', '&', $ matches [1]);
$ Json = curl_get ($ priceurl );
$ Json = $ json ['result'];
If (! Empty ($ json ))
{
$ Jsonresult = json_decode ($ json, true );
Foreach ($ jsonresult ['parts'] [0] ['webprice'] ['resale'] as $ k => $ v)
{
$ Result ['price'] [$ v ['minqty '] = $ v ['price'];
}
}
}

// Img_url
$ Pattern = '/<div \ s + id = "part_image"> [\ s \ n] * If (preg_match ($ pattern, $ html, $ matches ))
{
$ Result ['img _ url'] = trim ($ matches [1]);
}

// Page_url
$ Result ['page _ url'] = $ url;

Return $ result;
}

/**
* Final function call
* @ Param string $ keywords: Search Keyword
* @ Return array
*/
Function getData ($ keywords)
{
$ HrefList = getListHrefAll ($ keywords );
$ Result = array ();

Foreach ($ hrefList as $ k => $ v)
{
$ Result [] = getDetail ($ v );
}

Return $ result;
}

// Test Script
$ Keywords = trim ($ _ GET ['keyword']);
$ Result = getData ($ keywords );

Print_r ($ result );

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.