#!/usr/bin/env python
Import OS
Import Sys
Import Os.path
def find_import (line):
Line=line.strip ()
import_cmd= "Import"
If not Line.startswith (import_cmd):
Return None
Line=line[len (Import_cmd):].strip ()
Line=line.strip (';')
Parts=line.split ("{")
If Len (parts) ==1:
return parts
Head=parts[0]
Parts=parts[1].strip ("}")
Parts=parts.split (",")
parts=["%s%s"% (head, Part.strip ()) for part in parts]
return parts
def import2path (Roots, Import_name):
spath = Import_name.replace ('. ', '/')
For root in Roots:
Fpath=os.path.join (Root, "%s.java"%spath)
If Os.path.isfile (Fpath):
Return Fpath
Fpath=os.path.join (Root, "%s.scala"%spath)
If Os.path.isfile (Fpath):
Return Fpath
Return None
def file_info (Fpath):
F=open (Fpath, "R")
Lines=f.readlines ()
F.close ()
Lines=[line.strip () for line in lines if Line.strip ()! = ""]
Imports=[]
For line in lines:
Import_array = Find_import (line)
If Import_array! = None:
Imports.extend (Import_array)
return (Len (lines)-Len (imports), imports)
Def collect_file_info (collected, Roots, Entry_name):
If Entry_name in collected:
Return
Fpath=import2path (Roots, Entry_name)
If Fpath==none:
Collected[entry_name]=none
Return
If Fpath in collected:
Return
Info=file_info (Fpath)
COLLECTED[FPATH]=INFO[0]
For Import_name in info[1]:
Collect_file_info (collected, Roots, Import_name)
def collect_ref_info (Roots, Entry_names):
collect_info={}
For Entry_name in Entry_names:
Collect_file_info (Collect_info, Roots, Entry_name)
Return Collect_info
def show_files_with_lines (Files, title):
Print ("===============%s ================="%title)
Lines_total=0
Files_total=0
For f in Files:
Lines_total=f[1]+lines_total
Files_total=files_total+1
Print ("%s:%s"% (F[0], f[1]))
Print ("=============== total lines =%d,total files =%d ================="% (lines_total,files_total))
def show_files (Files, title):
Print ("===============%s ================="%title)
For f in Files:
Print (f)
If __name__== "__main__":
Roots=open (Sys.argv[1]). ReadLines ()
Roots=[root.strip () for root in Roots if Root.strip ()! = ""]
Entry_names=open (Sys.argv[2]). ReadLines ()
Entry_names=[entry_name.strip () for entry_name in Entry_names if Entry_name.strip ()! = ""]
Ref_info = Collect_ref_info (Roots, Entry_names)
In_files=[item for item in Ref_info.items () if Item[1]!=none]
OUT_FILES=[ITEM[0] for item in Ref_info.items () if Item[1]==none]
Spark_not_found=[f for F in Out_files if F.startswith ("Org.apache.spark.")]
Spark_not_found.sort ()
Hadoop_files=[f for F in Out_files if F.startswith ("Org.apache.hadoop.")]
Hadoop_files.sort ()
Other_files=list (Set (Out_files)-Set (Spark_not_found)-Set (Hadoop_files))
Other_files.sort ()
Show_files_with_lines (In_files, "spark source")
Show_files (Spark_not_found, "Spark import name not file name")
Show_files (Hadoop_files, "Hadoop ref")
Show_files (Other_files, "others ref")
All reference relationships for JAVA lookup classes (Python implementation)