Core code:
#!/usr/bin/env python
#-*-coding:utf-8-*-
from bisect import bisect
_list1, _list2 = [], []
_init = Fa LSE
Ip2int = Lambda ip_str:reduce (lambda A, B: (a << 8) + b, [int (i) for I-in Ip_str.split ('. ')])
Def _init ():
global _list, _init
if not _init: with
L in open (' ipdata.txt ', ' RB '):
ip1, ip2 = L.split () [: 2 ]
addr = '. Join (L.split () [2:])
ip1, ip2 = Ip2int (ip1), Ip2int (IP2) _list1.append (ip1)
_ List2.append ((ip1, ip2, addr))
_init = True
def ip_from (IP):
_init ()
i = Ip2int (IP)
idx = Bisect (_list1, i)
assert (idx > 0)
If Len (_list1) <= idx: Return
u ' unknown IP address%s '% IP
el SE:
frm, to, addr = _list2[idx-1]
if frm <= i <= to: return
addr
else: return
u ' Unknown i P address%s '% IP
if __name__ = = ' __main__ ':
print ip_from (' 115.238.54.106 ')
print Ip_from (' 220.181.29.160 ')
print ip_from (' 115.238.54.107 ')
print ip_from (' 8.8.8.8 ')
Code package Download http://xiazai.jb51.net/201105/yuanma/ipaddress.7z
Next, let's share more perfect code:
#!/usr/bin/env python # coding:utf-8 ' use Python script to query pure IP Library QQWry.Dat format is as follows: +----------+ | File header | (8 bytes) +----------+ | Record Area | (indefinite length) +----------+ | Index Area | (size is determined by file header) +----------+ file header: 4 bytes Start index offset value + 4 byte end index offset value record area: Each IP record format ==> IP address [national information] [Regional information] for national records, there are three ways to represent: characters String form (the 5th byte of IP record is not equal to 0x01 and 0x02), redirect Mode 1 (5th byte is 0x01), the next 3 bytes are the offset value redirection mode for the National Information Store (the 5th byte is 0x02), and there are two ways to represent regional records: string form and Redirect Last rule: You cannot follow the country record of redirect mode 1 to the Region Record index area: Each index record format ==> 4 bytes Start IP address + 3 byte IP record of the offset value Index area of the IPs and the area of the record that it points to is an IP range. The query information is the IP information in this range "' Import sys import socket from struct import pack, unpack Class Ipinfo (object): ' QQWry.Dat Database query feature set ' Def __init__ (self, dbname): ' Initialize class, read database contents as a string, determine database index information by starting 8 bytes ' Self.dbname = DBN AME # f = file (dbname, ' R ') # Demon Note: There will be a problem with ' R ' in Windows, and \ r \ n will be converted to \ n # See Http://demon.tw/programming/python-op En-mode.html # and the Python document does not advocate using the file function to open files, it is recommended to use open F = open (dbname, ' rb ') self.img = F.read () f.clOSE () # QQWry.Dat the beginning 8 bytes of the file is the index information, the first 4 bytes is the offset value of the start index, # The last 4 bytes is the offset value of the end index. # (Self.firstindex, Self.lastindex) = Unpack (' II ', Self.img[:8]) # Demon Note: Unpack used by default is machine-related # Intel endian and a MD64 (x86-64) is Little-endian # Motorola 68000 and PowerPC G5 is Big-endian # and the Pure database uses Little-endian byte sequence # so in some big-end The original code on Ian's machine will go wrong (self.firstindex, self.lastindex) = Unpack (' <ii ', Self.img[:8]) # 7 bytes per index, and the total number of indexes here SELF.I Ndexcount = (self.lastindex-self.firstindex)/7 + 1 def getString (self, offset = 0): "read string information, including" country "information and" regional "letter The recording area of QQWry.Dat each message is a string that ends with ' o2 ' = Self.img.find (', offset ') #return Self.img[offset:o2] # there There may be only national information without regional information, GB2312_STR = Self.img[offset:o2] Try:utf8_str = Unicode (gb2312_str, ' gb2312 '). Encode (' Utf-8 ' Except:return ' Unknown ' return utf8_str def getLong3 (self, offset = 0): ' The offset record in QQWry.Dat is 3 bytes, and this function gets 3 The general representation of the offset of the byte QQWry.Dat uses "strings" to store these values ' s = Self.img[offset:offSet + 3] s + + unpack with a ' I ' as format, followed by a string must be 4 bytes # return unpack (' I ', s) [0] # Demon Note: As above, forced to use lit Tle-endian return unpack (' <i ', s) [0] def getareaaddr (self, offset = 0): ' By giving an offset value, get the region information string, ' byt E = Ord (Self.img[offset]) If byte = = 1 or byte = 2: # The first byte is 1 or 2 o'clock, obtain 2-4 bytes as an offset to call itself P = self.getlong3 (of Fset + 1) return self.getareaaddr (p) Else:return self.getstring (offset) def getaddr (self, offset, IP = 0: img = self.img o = offset byte = Ord (Img[o]) If byte = 1: # REDIRECT Mode 1 # [ip][0x01][Countries and regions Absolute offset of information] # using the next 3 bytes as an offset call byte get information return self.getaddr (SELF.GETLONG3 (o + 1)) If Byte = 2: # REDIRECT Mode Type 2 # [ip][0x02][the absolute offset of national information] [region information string] # Use the national information offset to call itself get string information Carea = Self.getareaaddr (Self.getlong3 (o + 1) O + 4 # Skip the first 4 bytes take string as region information Aarea = SELF.GETAREAADDR (o) return (Carea, Aarea) If byte!= 1 a nd byte!= 2: # most JaneSingle IP record form, [ip][Country Information] # REDIRECT Mode 1 There's a situation where the offsets point to two string # that contains country and region information # that is, the first byte that the offset points to is not 1 or 2, use the branch here # Simply put: Take two characters consecutively
String!
Carea = self.getstring (o) #o + = 2*len (Carea) + 1 # We've modified Carea to encode the utf-8 character, Len gets the length of the change, # Use the following method to get offset o = Self.img.find (' yes ', O) + 1 Aarea = self.getstring (o) if Aarea = = "?": Aarea = "Telecom" if Aarea = = "letter": Aarea = "" If Aarea = "[": Aarea = "Unicom" Return (Carea, Aarea) def find ( Self, IP, L, R): "Use the binary to find the index record of the network byte encoded IP address ' if r-l <= 1:return l m = (L + r)/2 O = Sel F.firstindex + M * 7 #new_ip = unpack (' I ', self.img[o:o+4]) [0] # Demon Note: The same as above, forced to use little-endian new_ip = UN Pack (' <i ', self.img[o:o+4]) [0] If IP <= new_ip:return self.find (IP, L, m) Else:return self. Find (IP, M, r) def getipaddr (self, IP): "Call other functions, get information!" "# Using network byte coded IP Address IP = unpack ('!
I ', Socket.inet_aton (IP)) [0] # Use the Self.find function to find the index offset of IP i = self.find (IP, 0, self.indexcount-1) # get index record o = Self.firstindex + i * 7 # Index record format is: first 4 bytes IP information + 3 byte offset to IP record information # Here is the use of the last 3 bytes as an offset to get their general representation (QQWry.Dat with a string representation value) O2 = Self.getlong3 (O + 4) #
The IP record offset value +4 can discard the first 4 bytes of IP address information.
(c, a) = Self.getaddr (O2 + 4) return (c, a) def output (self, the last): For I in range (I, last): o = self.firstindex + i * 7 IP = socket.inet_ntoa (Pack ('! I ', unpack (' I ', self.img[o:o+4]) [0]) offset = self.getlong3 (O + 4) (c, a) = SELF.GETADDR (offset + 4) PR int "%s%d%s/%s"% (IP, offset, C, a) def getip (IP): Import os _localdir=os.path.dirname (__file__) _curpath=os.path . Normpath (Os.path.join (OS.GETCWD (), _localdir)) Curpath=_curpath i = ipinfo (curpath+ '/qqwry.dat ') (c, a) = I.getipadd R (IP) return c+a def main (): Import OS _localdir=os.path.dirname (__file__) _curpath=os.path.normpath (Os.path.join ( OS.GETCWD (), _localdir)) Curpath=_curpath i = IpinFo (curpath+ '/qqwry.dat ') if Os.path.exists (sys.argv[1): For line in open (sys.argv[1), "R"). ReadLines (): line =
Line.replace ("\ R", ""). Replace ("\ n", "") (c, a) = I.getipaddr (line) # Demon Note: If you are running the code back to gb2312 in the Windows command-line to avoid garbled if Sys.platform = = ' Win32 ': c = Unicode (c, ' Utf-8 '). Encode (' gb2312 ') a = Unicode (A, ' utf-8 '). Encode ( ' gb2312 ') print '%s%s/%s '% (line, C, a) Else: (c, a) = I.getipaddr (sys.argv[1]) # Demon Note: If you are in the Windows command line Run the code back to gb2312 to avoid garbled if sys.platform = = ' Win32 ': c = Unicode (c, ' Utf-8 '). Encode (' gb2312 ') a = Unicode (A, ' UT F-8 '). Encode (' gb2312 ') print '%s%s/%s '% (sys.argv[1], C, a) if __name__ = ' __main__ ': Main ()
Use Python script to query pure IP library QQWry.dat (demon modified version)
Because to use Python to read a pure IP database QQWry.dat format similar to the IPV6 database, so the search on the internet, in Linuxtoy saw a Python script, found that there are some small problems, and then modified.
#!/usr/bin/env python # coding:utf-8 # from:http://linuxtoy.org/files/pyip.py # BLOG:HTTP://LINUXTOY.ORG/ARCHIVES/PYT Hon-ip.html # Modified by Demon # blog:http://demon.tw/programming/python-qqwry-dat.html ' use Python script to query pure IP library QQWry.Dat The format is as follows: +----------+ | File header | (8 bytes) +----------+ | Record Area | (indefinite length) +----------+ | Index Area | (Size up to file header) +----------+ file header: 4 bytes Start index offset value + 4 byte end index offset value record area: Each IP record format ==> IP address [national information] [regional information] There are three ways to represent national records: string form (
The 5th byte of the IP record is not equal to 0x01 and 0x02), redirect Mode 1 (5th byte is 0x01), the next 3 bytes are the offset value redirection mode for the National Information Store (the 5th byte is 0x02), and there are two ways to represent regional records: string form and redirection Last rule: You cannot follow the country record of redirect mode 1 to the Region Record index area: Each index record format ==> 4 bytes Start IP address + 3 byte IP record of the offset value Index area of the IPs and the area of the record that it points to is an IP range. The query information is the IP information in this range "' Import sys import socket from struct import pack, unpack Class Ipinfo (object): ' QQWry.Dat database Query feature set ' Def __init__ (self, dbname): ' Initialize class, read database contents as a string, determine database index information by starting 8 bytes ' Self.dbname = DBN AME # f = file (dbname, ' R ') # Demon Note: There is a problem with ' R ' under Windows, which converts \ r \ n to\ n See http://demon.tw/programming/python-open-mode.html # and the Python document does not advocate using the file function to open files, it is recommended to use open F = open (dbname,
' RB ') self.img = F.read () f.close () # QQWry.Dat the start 8 byte of the file is the index information, the first 4 bytes is the offset value of the start index, # The last 4 bytes is the offset value of the end index. # (Self.firstindex, Self.lastindex) = Unpack (' II ', Self.img[:8]) # Demon Note: Unpack used by default is machine-related # Intel endian and am D64 (x86-64) is Little-endian # Motorola 68000 and PowerPC G5 is Big-endian # and the Pure database uses Little-endian byte sequence # so in some Big-endi An error in the original code on the machine (Self.firstindex, self.lastindex) = Unpack (' <ii ', Self.img[:8]) # 7 bytes per index, this gets the total number of indexes Self.ind
Excount = (self.lastindex-self.firstindex)/7 + 1 def getString (self, offset = 0): ' Read string information, including ' Country ' and ' region ' information QQWry.Dat's record area each message is a string ending with ' O2 ' = Self.img.find (', offset ') #return Self.img[offset:o2] # There may be only national information without regional information, GB2312_STR = Self.img[offset:o2] Try:utf8_str = Unicode (gb2312_str, ' gb2312 '). Encode (' Utf-8 ') Except:return ' notKnow ' return utf8_str def getLong3 (self, offset = 0): ' The offset record in QQWry.Dat is 3 bytes, and this function gets a general representation of the 3-byte offset QQWry.Dat uses the ' character String "Store these values '" s = Self.img[offset:offset + 3] s + = ' unpack ' with a ' I ' as format, followed by the string must be 4 bytes # return unpack (
' I ', S ' [0] # Demon Note: As above, force the use of Little-endian return unpack (' <i ', s) [0] def getareaaddr (self, offset = 0): ' By giving an offset value, get the region information string, ' byte = Ord (Self.img[offset]) If byte = 1 or byte = 2: # The first byte is 1 or 2 o'clock, get 2-4 The byte calls itself as an offset P = self.getlong3 (offset + 1) return self.getareaaddr (p) else:return self.getstring (o
Ffset def getaddr (self, offset, IP = 0): img = self.img o = offset byte = Ord (Img[o]) If byte = 1: # REDIRECT Mode 1 # [ip][0x01][the absolute offset of country and region information] # Use the next 3 bytes as an offset call byte to get information return self.getaddr (Self.getlong3 (o (+ 1)) If Byte = = 2: # REDIRECT Mode 2 # [ip][0x02][the absolute offset of national information] [region information string] # Use the national information offset to call itself get string information cAre A = SELF.GETAREAADDR (selF.getlong3 (o + 1)) O = 4 # Skip the first 4 bytes take string as region information Aarea = SELF.GETAREAADDR (o) return (Carea, Aarea) If byte!= 1 and Byte!= 2: # The simplest form of IP recording, [ip][Country Information] # REDIRECT Mode 1 There's a situation where the offset points to include country and region information two string # that is biased
The first byte that the shift points to is not 1 or 2, just use the branch here # simply: Fetch two strings consecutively!
Carea = self.getstring (o) #o = Len (carea) + 1 # We've modified the Carea for the utf-8 character encoding, Len gets the length will change, # Use the following method to get offset
o = Self.img.find (' R ', O) + 1 Aarea = self.getstring (o) return (Carea, Aarea) def find (self, IP, L, R): "' Use the binary to find the index record of the IP address of the network byte-encoded" ' If R-l <= 1:return l m = (L + r)/2 o = self.firstindex + M * 7 #new_ip = unpack (' I ', self.img[o:o+4]) [0] # Demon Note: As above, force the use of Little-endian new_ip = unpack (' <i ', self.
IMG[O:O+4]) [0] If IP <= new_ip:return self.find (IP, L, m) else:return self.find (IP, M, R) def getipaddr (self, IP): "Call other functions, get information!" "# Using network byte coded IP Address IP = unpack ('! I ', Socket.inet_aton (IP)) [0] # Use Self.find function to find IP index offset i = self.find (IP, 0, self.indexcount-1) # get indexed record O = Self.firstindex + i * 7 # Index record format is: first 4 bytes ip information + 3 bytes offset to IP record information # Here is the use of the last 3 bytes as an offset to get its general representation (QQWry.Dat the value of a string) O2 =
Self.getlong3 (O + 4) # IP record offset value +4 can discard the first 4 bytes of IP address information.
(c, a) = Self.getaddr (O2 + 4) return (c, a) def output (self, the last): For I in range (I, last): o = self.firstindex + i * 7 IP = socket.inet_ntoa (Pack ('! I ', unpack (' I ', self.img[o:o+4]) [0]) offset = self.getlong3 (O + 4) (c, a) = SELF.GETADDR (offset + 4) PR int "%s%d%s/%s"% (IP, offset, C, a) def main (): i = ipinfo (' QQWry.Dat ') (c, a) = I.getipaddr (sys.argv[1)) # De Mon Note: If you are running the code back to gb2312 in the Windows command line to avoid garbled if sys.platform = = ' Win32 ': c = Unicode (c, ' Utf-8 '). Encode (' gb2312 ') a = Unicode (A, ' utf-8 '). Encode (' gb2312 ') print '%s%s/%s '% (sys.argv[1], C, a) if __name__ = ' __main__ ': Main () # Chan
Gelog # Time: May 29, 2009# 1. Tools below the user's suggestion, modify "o = Len (carea) + 1" # http://linuxtoy.org/archives/python-ip.html#comment-113960 # Because this time I have to get the string into Utf-8 code, the length will change!