newlisp跟蹤jobtracker狀態,newlispjobtracker

來源:互聯網
上載者:User

newlisp跟蹤jobtracker狀態,newlispjobtracker

基本思路,通過newlisp定時下載jobtracker頁面,用Regex解析html中的table元素,然後獲得最新的mapreduce的狀態。

每次獲得狀態資料後,存入mysql資料庫,然後用tableau將mapreduce叢集狀態用報表轉譯。


這是jobtracker網站的資料



這是Tableau繪製的報表


這樣就可以用資料視覺效果的方式展示Hadoop叢集計算的壓力狀態。


下面是newlisp代碼,主要就是用Regex解析html,用mysql模組寫入資料庫。

#!/usr/bin/newlisp(load "mysql.lsp")(define (check-args)  (print "args: ")  (println (main-args))  (set 'args-length (length (main-args)))  (if (< args-length 3)      (begin(println "the number of args must be 3 or 4, e.g. ./job.lsp jobtracker.bigdata.cn 8080")(exit))))(define (parse-args)  (set 'domain (main-args 2))  (if (= 4 args-length)      (set 'port (main-args 3)))  (if (= 3 args-length)      (set 'port "80"))  (set 'url (string "http://" domain ":" port "/jobtracker.jsp"))  (println (string "job tracker site is located at " url)))(define (access-job-tracker-site)  (set 'page-content (get-url url))  (extract-tables page-content)  )(define (extract-summary-table table)  (if (regex "Running Map Tasks" table)      table)  )(define (get-number td)  (set 'r ((regex "<td>(.*)</td>" td) 3))  (if (find "<a href" r)      ((regex "<a.*?>(.*)</a>" r) 3)    r))(define (remove-td tds)  (set 'result '())  (dolist (td tds)    (push (get-number td) result -1)    )  result  )(define (parse-summary-table table)  (set 'all-tds (find-all "<td>[\\s\\S]*?</td>" table))  (set 'all-summary-values (remove-td all-tds))  )(define (extract-tables html-content)  (set 'all-tables (find-all "<table[^>]+>[\\s\\S]*?</table>" html-content))  (dolist (table all-tables)    (if (extract-summary-table table)(set 'summary-table table))        )  (parse-summary-table summary-table)  )(define (write-summary-to-mysql all-summary-values)  (println all-summary-values)  (set 'mysql-instance (Mysql))  (println "mysql-instance: " mysql-instance)  (set 'mysql-host  "10.100.10.10")  (set 'mysql-port 3306)  (set 'mysql-user "user")  (set 'mysql-pwd "123456")  (set 'mysql-db "bigdata_data_market")  (set 'job-tracker-summary-table "hadoop_job_tracker_summary")  (:connect mysql-instance mysql-host mysql-user mysql-pwd mysql-db mysql-port)  (:query mysql-instance "SET character_set_client = utf8;")  (set 'insert-summary-sql (format "insert into %s (collect_time,running_map_tasks,running_reduce_tasks,total_submissions,nodes,occupied_map_slots,occupied_reduce_slots,reserved_map_slots,reserved_reduce_slots,map_task_capacity,reduce_task_capacity,average_tasks_per_node,blacked_listed_nodes,gray_listed_nodes,exclueded_nodes) values (now(),%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"   job-tracker-summary-table   (all-summary-values 0)   (all-summary-values 1)   (all-summary-values 2)   (all-summary-values 3)   (all-summary-values 4)   (all-summary-values 5)   (all-summary-values 6)   (all-summary-values 7)   (all-summary-values 8)   (all-summary-values 9)   (all-summary-values 10)   (all-summary-values 11)   (all-summary-values 12)   (all-summary-values 13)))  (println insert-summary-sql)  (:query mysql-instance insert-summary-sql)  );; main logic starts now(check-args)(parse-args)(access-job-tracker-site)(write-summary-to-mysql all-summary-values)(exit)






相關文章

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.