1.JavaScript encryption What's the most annoying:-(
1). Eval an immediate invocation of a function that does not depend on an external variable it's naïve to see me nodejs to kill you!
2). Authentication of HTTP Requests first try referer,"cookies" is not as important as you think. "
3). Curl and each command-line tool handle text very handy.
4). But Python doesn't have a few more lines.
2.Requests efficiency is much better than lxml himself.
3.progressbar is too advanced, I will write it myself ...
4.argparse Write Python command-line program Prerequisites AH ~
5.string. The template is also very useful!
6. The following is the main code, in addition to the standard library and lxml and requests, no modules are in the Omnipotent Winterpy warehouse. In fact, the main code is also in the.
#!/usr/bin/env python3# vim:fileencoding=utf-8import sysfrom functools Import partialfrom string Import Templateimport Argparseimport base64from urllib.parse Import unquotefrom lxml.html import fromstringimport requestsfrom htmlutils Import extracttextfrom termutils Import foreachsession = requests. Session () def main (index, filename= ' $name-$author. txt ', start=0): R = Session.get (index) r.encoding = ' gb18030 ' doc = froms Tring (R.text, Base_url=index) doc.make_links_absolute () name = Doc.xpath ('//div[@class = "info"]/p[1]/a/text () ') [0] Author = Doc.xpath ('//div[@class = ' info ']/p[1]/span/text () ') [0].split () [-1] Nametmpl = Template (filename) fname = Nametmpl.substitute (Name=name, Author=author) with open (fname, ' W ') as F:sys.stderr.write (' Download to file%s. \ n '% fname) links = Doc.xpath ('//div[@class = ' chapterlist ']/ul/li/a ') Try:foreach (links, partial (gather_content, F.WR ITE), Start=start) except KeyboardInterrupt:sys.stderr.write (' \ n ') sys.exit (' n ') sys.stderr.write (' \ nthe ') return trued EF GATher_content (Write, I, L): # curl-xpost-f bookid=2747-f chapterid=2098547 ' http://www.feisuzw.com/skin/hongxiu/ include/fe1sushow.php ' #--referer http://www.feisuzw.com/Html/2747/2098547.html # tail +4 # base64-d # sed ' S/&#&A Mp;/u/g ' # ascii2uni-qaf # ascii2uni-qaj #Paragraphs URL = l.get (' href ') _, _, _, _, BookID, Chapterid = Url.split ('/') Chapterid = Chapterid.split ('. ', 1) [0] r = S Ession.post (' http://www.feisuzw.com/skin/hongxiu/include/fe1sushow.php ', data={' BookID ': BookID, ' Chapterid ': Chapterid,}, headers={' Referer ': url}) Text = r.content[3:] # strip BOM text = base64.decodebytes (text). replace (b ' &#& amp; ', br ' \u ') Text = Text.decode (' unicode_escape ') Text = unquote (text) Text = Text.replace ('
', '). Replace ('
', ' \ n ') ' title = L.text Write (' \ n ') write (' \ n ') write (' \ nthe ') return titleif __name__ = = ' __main__ ': Parser = Argparse. Argumentparser (description= ' Download fast Chinese web novel ') parser.add_argument (' url ', help= ' novel Home Link ') parser.add_argument (' name ', DE fault= ' $name-$author. txt ', nargs= '? ', help= ' save file name template (Support $name and $author ') parser.add_argument ('-s ', '--start ', de Fault=1, Type=int, metavar= ' N ', help= ' download start Page position (starting at 1) ') args = Parser.parse_args () main (Args.url, Args.name, arg S.START-1)