1<!doctype html>234<meta charset= "UTF-8" >5<title>Document</title>6<script>7 functionGeturls () {8 varreg=9/<a\s+[^>]*?href=[' "] ([^ '"]+?) [‘"] [^>]*?>/G;Ten varArr=NULL;//declaring variable arr, initialized to null One //gets the contents of the BODY element, saved in the variable HTML A varHtml=Document.body.innerHTML; - //repeatedly finds a string in HTML that conforms to the Reg rule, and saves it in arr, if ARR is not equal to NULL, continue looking for - while((arr=reg.exec (HTML))! =NULL){ the //arr: ["<a ....>", "http://..."] - //output The A element found this time -Console.log (arr[1]); - //regexp.$1 take out the child of the first group of this match + } - } +</script> A at<body> -<link href= "Index.css"/><body><a class= "header" href= "http://tedu.cn" >go to Tedu</a> -<button onclick= "Geturls ()" > Start crawler </button> -</body> -
javascript--Analog web crawler