Main code:
1Gecco (matchurl= "Https://github.com/{user}/{project}", pipelines= "Consolepipeline")2 Public classMygithubImplementsHtmlbean {3 4 Private Static Final LongSerialversionuid = -7127412585200687225l;5 6 @Request7 PrivateHttpRequest request;8 9@RequestParameter ("User")Ten PrivateString user; One A@RequestParameter ("Project") - PrivateString project; - the @Text -@HtmlField (csspath= ". Repository-meta-content") - PrivateString title; - + @Text -@HtmlField (csspath= ". Pagehead-actions li:nth-child (2). Social-count") + Private intStar; A at @Text -@HtmlField (csspath= ". Pagehead-actions li:nth-child (3). Social-count") - Private intFork; - -@Href (click=false) -@HtmlField (csspath= "Ul.numbers-summary > Li:nth-child (4) > a") in PrivateString contributors; - to@HtmlField (csspath= ". Entry-content") + PrivateString Readme; - the PublicHttpRequest getrequest () { * returnrequest; $ }Panax Notoginseng - Public voidsetrequest (HttpRequest request) { the This. Request =request; + } A the PublicString Getreadme () { + returnReadme; - } $ $ Public voidSetreadme (String readme) { - This. Readme =Readme; - } the - PublicString GetUser () {Wuyi returnuser; the } - Wu Public voidsetUser (String user) { - This. user =user; About } $ - PublicString Getproject () { - returnproject; - } A + Public voidSetproject (String project) { the This. Project =project; - } $ the PublicString GetTitle () { the returntitle; the } the - Public voidSettitle (String title) { in This. title =title; the } the About Public intGetstar () { the returnStar; the } the + Public voidSetstar (intstar) { - This. Star =Star; the }Bayi the Public intgetfork () { the returnFork; - } - the Public voidSetfork (intfork) { the This. Fork =Fork; the } the - PublicString getcontributors () { the returncontributors; the } the 94 Public voidSetcontributors (String contributors) { the This. Contributors =contributors; the } the 98 Public Static voidMain (string[] args) { About geccoengine.create () -. Classpath ("Com.geccocrawler.gecco.demo")101 //page address to start crawling102. Start ("Https://github.com/xtuhcy/gecco")103 //turn on several crawler threads, preferably not more than the start request number104. Thread (2) the //the interval between the time a single crawler crawls a request106. Interval (2000)107 . Run ();108 }109 the}
JAVA crawler Gecco