##### #Overview of web.crawling related modules. #Note that, below codes can is executed just for overview intention.### ###!/usr/bin/perl##### #HTTP:: thin#### #use 5.12.1;use http::request::common;use http::thin;say HTTP::Thin->new () ->request (GET ' http://example.com ')->as_string;##### #HTTP: tiny#### #use http::tiny;my $response = http::tiny- >new->get (' http://example.com/');d ie "failed! \ n "Unless $response->{success};p rint" $response->{status} $response->{reason} \ n "while (My ($k, $v) = each%{$ Response->{headers}}) {for (ref $v eq ' ARRAY '? @ $v: $v) {print "$k: $_ \ n"; }}print $response->{content} If length $response->{content}; #new $http = http::tiny->new{%attrubutes}; #valid Attributes include:#-agent#-cookie_jar#-default_headers#-local_address#-keep_alive#-max_redirect#-max_size#- Https_proxy#-proxy#-no_proxy#-timeout#-verify_ssl#-ssl_options#get[head][put][post]delete$response = $http Get ($url); $response = $http->get ($url, \%options); $response = $http->head ($url), #post_form $response = $http->post_form ($url, $form _data); $response = $http Post_form ($url, $form _data, \%options), #request $response = $http->request ($method, $url); $response = $http Request ($method, $url, \%options), $http->request (' GET ', ' http://user:pwd [email protected] '); #or $http Request (' GET ', ' http://mars%40:pwd [email protected] '); #www_form_urlencode $params = $http->www_form_ UrlEncode ($data); $response = $http->get ("Http://example.com/query $params"); #SSL supportssl_options = = Ssl_ Ca_file = $file _path,} #proxy support##### #www:: mechanize# #Stateful Programmatic web browsing, used for automating Interaction with websites.#### #use www::mechanize;my $mech = Www::mechanize->new (); $mech->get ($url); $mech Follow_link (n = 3); $mech->follow_link (Text_regex = Qr/download this/i); $mech->follow_link (url = + ' HT Tp://host.com/index.html '); $mech->submit_form (Form_number = 3, fields = {username = ' banana ', Passoword = ' Lost-and-alone ',}); $mech->submit_form (Form_name = > ' Search ', fields = {query = ' pot of Gold ',}, Button = ' Search Now '; #testing Web Applicationsuse Test :: More;like ($mech->content (), qr/$expected/, "Got expected content"), #page traverse$mech->back (); #finer Control over Page$mech->find_link (n = $number); $mech->form_number ($number); $mech->form_name ($name); $ Mech->field ($name, $value), $mech->set_fields ($field _values), $mech->set_visible (@criteria), $mech Click ($button); #subclass of lwp::useragent, eg: $mech->add_header ($name = $value); #page-fecting Methods#status Methods#content-handling methods#link methods#image methods#form methods#field methods#miscellaneous methods# Overridden Lwp::useragent methods#inherited unchanced lwp::useragent Methods#yeah Now, it's easy to implement a spider pro Ject for the future integration use.
Mars
Learn web.crawling of Perl