CODE:
#!/usr/bin/python #-*-Coding:utf-8-*-"Created on 2014-8-19@author:guaguastd@name:job_title_standard.py" ' Import Osimport csvfrom Collections Import counterfrom operator import itemgetterfrom prettytable import prettytable# Specify CSV Directorycsv_file = Os.path.join (r "E:", "\ \", "Eclipse", "LinkedIn", "Dfile", "My_connections.csv") # define a set of Tran Sforms that converts the first item# to the second itemtransforms = [(' Sr. ', ' Senior '), (' SR ', ' Senior '), (' Jr. ') , ' Junior '), (' Jr ', ' Junior '), (' CEO ', ' chief Executive Officer '), (' COO ', ' chief Operating Officer '), (' CTO ', ' chief technology Officer '), (' CFO ', ' chief Finance Officer '), (' VP ', ' Vice President '),]csvreader = csv. Dictreader (Open (csv_file), delimiter= ', ', quotechar= ' "') contacts = [row for row in csvreader]#-Read in a list of titles an D Split # Apart any combined titles like "President/ceo." # "President & CEO", "president and CEO" titles = []for contact in Contacts:titles.extend ([T.sTrip () for T in contact[' job title '].split ('/') if contact[' job title '].strip ()! = ']) # Replace common/k Nown Abbreviationsfor I, _ in Enumerate (titles): For transform in transforms:titles[i] = Titles[i].replace (*tra Nsform) # Print out a table of titles sorted by Frequencypt = prettytable (field_names=[' Title ', ' Freq ']) pt.align = ' l ' c = C Ounter (titles) [Pt.add_row ([title, Freq]) for (title, freq) in sorted (C.items (), Key=itemgetter (1), reverse=true) if freq > 0]print pt# Print out a table of tokens sorted by Frequencytokens = []for title in Titles:tokens.extend ([T.strip (', ') for T in Title.split ()]) pt = prettytable (field_names=[' Token ', ' Freq ']) pt.align = ' l ' c = Counter (tokens) [Pt.add_row ([token, freq]) for (tokens, freq) in sorted (C.items (), Key=itemgetter (1), reverse=true) if freq > 0 and len (token) &G T 2]print PT
RESULT:
+-----------------------------------+------+| Title | Freq |+-----------------------------------+------+| Senior Software Developer | 1 | | Sales Manager | 1 | | Software Manager | 1 | | Online Marketing Manager | 1 | | Senior Consultant | 1 | | Chief Executive Officer & Founder | 1 | | Director | 1 | | S | 1 | | Student | 1 | | Senior Software Engineer | 1 | | ??? | 1 |+-----------------------------------+------+ +------------+------+| Token | Freq |+------------+------+| Manager | 3 | | Senior | 3 | | Software | 3 | | Marketing | 1 | | Founder | 1 | | Consultant | 1 | | Executive | 1 | | Sales | 1 | | Developer | 1 | | Director | 1 | | Chief | 1 | | Officer | 1 | | Student | 1 || Online | 1 | | ??? | 1 | | Engineer | 1 |+------------+------+