PHP collects information from Windows apps
Last Update:2015-08-07
Source: Internet
Author: User
<span id="Label3"> </p><pre class="brush:php; toolbar: true; auto-links: false;"><?php/** * Test new page crawler */# $url = ' http://apps.microsoft.com/windows/en-us/app/fotor/ 6f797ba2-500d-4dee-9c5a-13c2d818c958 '; $url = ' https://www.microsoft.com/en-us/store/apps/adobe-photoshop-express /9wzdncrfj27n '; $url = Trim ($url); $d = array (); $content = Html_entity_decode (get ($url), ent_html5, ' UTF-8 ');//pfn$pfn = ";" if (preg_match ('/data-pfn= ' (. *) ">/isu" , $content, $match)) {$d [] = $match [1]; $PFN = $match [1]; echo "pfn:". $pfn. " \ n ";} else {echo ' pfn error: ', $url, ' \ n '; Exit ();} New Urlif (preg_match ('/\[url: (. *) \]/is ', $content, $match)) {$newurl = $match [1];echo "url:". $newurl. " \ n ";} Else{echo ' get no new URL '. \ n "; Exit ();} iconif (preg_match ('/class= ' pull-left ph-logo ">.*src=" (. *) ". *style=" background-color: (. *);. * "/isu", $content, $match)) {$icon = $match [1]; $backgroundcolor = $match [2];echo "icon:". $icon. " \ n "; echo" backgroundcolor: ". $backgroundcolor." \ n ";} Else{echo ' Get no icon '. \ n "; Exit ();} Nameif (preg_match ('/id= "page-title". *itemprop=" name "> (. *) <\//', $content, $match)) {$name = $match [1];echo" name: ". $name." \ n ";} Else{echo ' Get No Name '. " \ n "; Exit ();} Aliasif (preg_match ('/apps\/(. *) \//isu ', $newurl, $match)) {$alias = $match [1];echo ' alias: '. $alias. ' \ n ";} Else{echo ' Get no alias '. \ n "; Exit ();} Ratingif (preg_match ('/class= ' Srv_ratingsscore win-rating-average ' > (. *) <\//', $content, $match)) {$rating = $ Match[1];echo "rating:". $rating. " \ n ";} Else{echo ' Get no rating '. " \ n "; Exit ();} Rating numif (preg_match ('/class= ' win-rating-total ' > (. *) <\//', $content, $match)) {$ratingcount = Trim (str_ Replace (' ratings ', ' ', preg_replace ('/,/', ', $match [1]))); echo "Rating num:" $ratingcount. " \ n ";} Else{echo ' Get no rating num '. " \ n "; Exit ();} Priceif (preg_match ('/class= "price srv_price" ><span class= "header-sub" > (. *) <\//', $content, $match)) { $price = $match [1];echo "prcie:". $price. " \ n ";} Else{echo ' Get no price '. " \ n "; Exit ();} Categoryif (preg_match ('/<meta name= "ms.prod_sbcat" content= "(. *)" \/>/isu ", $content, $match)) {$category = Trim ($match [1]); echo "category:". $category. " \ n ";} Else{if (preg_match ('/<meta name= "ms.prod_cat" content= "(. *)" \/>/isu ', $content, $match)) {$category = Trim ($ match[1]); echo "category:" $category. " \ n ";} Else{echo ' Get no category '. \ n "; Exit ();}} Content ratingif (preg_match ('/content Rating: <a .*> (. *) <\//isu ', $content, $match)) {$contentRating = Trim ( $match [1]); echo "content rating:" $contentRating. " \ n ";} Else{echo ' Get no content rating '. " \ n "; $contentRating = ' All ';} Publisherif (preg_match ('/publisher<\/dt>.*<div class= "content.*" .*> (. +) <\//isu ', $content, $ Match) {$publisher = Trim ($match [1]); echo "publisher:". $publisher. " \ n ";} Else{echo ' Get no publisher '. \ n "; Exit ();} Works onif (preg_match ('/works on: (. *) </isu ', $content, $match)) {$workson = Trim ($match [1]); echo ' Works on: '. $ Workson. " \ n ";} Else{echo ' Get no works platform '. " \ n ";//exit ();} Sizeif (preg_match ('/approximate size<\/dt>.*<div class= "content.*" .*> (. +) <\IsU ', $content, $match) {$size = Trim ($match [1]); echo "size:". $size. " \ n ";} Else{echo ' Get no size '. " \ n ";//exit ();} Supportedprocessorsif (preg_match ('/supported processors<\/dt>.*<div class= "content.*" .*> (. +) <\/ /isu ', $content, $match) {$processors = Trim ($match [1]); echo "processors:". $processors. " \ n ";} Else{echo ' Get no processors '. " \ n ";//exit ();} Ageif (preg_match ('/age rating<\/dt>.*<div class= "content.*" .*> (. +) <\//isu ', $content, $match)) {$ Age = Trim ($match [1]), echo "age:". $age. " \ n ";} Else{echo ' Get no Age '. \ n ";//exit ();} Languagesif (preg_match ('/supported languages<\/dt>.*<dd .*> (. *) <\/dd>/isu ', $content, $match)) {if (preg_match_all ('/<div> ([^<].*) <\/div>/', $match [1], $temp)) {$languages = implode (",", $temp [1]); echo "languages:". $languages. " \ n ";}} Else{echo ' Get no languages '. " \ n ";//exit ();} Featuresif (preg_match ('/class= ' section-title.* ">Features.*<ul> (. *) <\/ul>/isu ', $content, $match ) {if (preg_match_all ('/<lI class= "avoid-break" > (. *) <\/li>/isu ', $match [1], $temp) {$features = $temp [1];echo ' features: ';p rint_r ($ features); Echo "\ n";}} Else{echo ' Get no features '. " \ n ";} Release NotesIf (preg_match ('/class= "section-title.*" >version notes.*<p> (. *) <\/p>/isu ', $content, $match) {$releasenotes = $match [1];echo] release Notes: ". $releasenotes." \ n ";} Else{echo ' No version notes '. \ n ";} Screenshotsif (preg_match_all ('/class= ' media-img ratio-16-9 ">.*/isu ', $content, $ Match) {$screenshots = $match [1];echo "screenshots:";p rint_r ($screenshots); echo "\ n";} Else{echo ' Get no screenshots '. " \ n "; Exit ();} descriptionif (preg_match ('/<div class= "showmore m-t-pdp" >.*<p.*> (. *) <\//isu ', $content, $match)) { $description = $match [1];echo ' description: '. $description. \ n ";} Else{echo "get No description content\n"; exit (); Exit (); function Get ($url) {$ch = Curl_init ($url); curl_setopt ($ch, curlopt_useragent, ' mozilla/5.0 (X11; Linux X86_64) Applewebkit/537.11 (khtml, like Gecko) chrome/23.0.1271.64 safari/537.11 '); curl_setopt ($ch, curlopt_returntransfer, true); curl_setopt ($ch, curlopt_followlocation, true); $output = curl_exec ($ch); $curlinfo = Curl_getinfo ($ch); $lasturl = $curlinfo [' URL ']; Curl_close ($ch); Return $output. " [url: $lasturl] ";}</pre> <p><p><br></p></p> <p><p>Finished station: www.topwindata.com, Windows 101 released, traffic doubled, but still only about 1000 ip.</p></p><p><p>PHP collects information from Windows apps</p></p></span>