Python uses regular expressions to search for floating point code instances in strings or files,
Python and numpy are used to process a large amount of data. Several small functions are written to facilitate data reading and writing:
#-*-Coding: UTF-8-*-# ---------------------------------------------------------------------- # FileName: gettxtdata. py # function: reads numeric Data (floating point number) in strings and files # mainly provides functions like dlmread and dlmwrite in matlab # loadtxtdata and savetxtdata functions # Data: 2013-1-10 # Author: wu xuping # define import numpy # -------------------------------------------------------------------------- def StringToDoub LeArray (String): "" # Replace all non-Double characters in the String with spaces # comment at the beginning of '#' until the end of the line, all are cleared # One-dimensional numpy is returned. array "from StringIO import re DataArray = numpy. empty ([0], numpy. float64) if len (String. strip ()> 0: # clear comments, all of which start with '#' and subcharacter doublestring = re. sub ('#. * $ ', "", String, count = 0, flags = re. IGNORECASE) # Delete non-numeric characters doublestring = re. sub ('[^ 0-9.e +-]', "", doublestring, count = 0, flags = re. IGNORECASE) # remove incorrect numeric format (Code repetition is required Required) doublestring = re. sub ('[. e +-] (? = \ S) ', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub (' [. e +-] (? = \ S) ', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub ('[e +-] $', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub ('[e +-] $', "", doublestring, count = 0, flags = re. IGNORECASE) # Remove the leading and trailing spaces doublestring = doublestring. strip () if len (doublestring)> 0: StrIOds = StringIO (doublestring) DataArray = numpy. genfromtxt (StrIOds) return DataArray #------------------------------------ ---------------------------------- Def GetDoubleListFromString (String): "" # Use line breaks to separate strings # Replace all non-Double characters in strings with spaces # Start with '#' and end with comments until the end of the line, are cleared # convert each row to numpy. array # returns numpy. list of array "from StringIO import re DoubleList = [] StringList = String. split ('\ n') # Use A linefeed to separate the string for Line in StringList: if len (Line. strip ()> 0: # clear comments, all of which start with '#' and subcharacter doublestring = re. sub ('#. * $ ', "", Line, count = 0, flags = re. IGNORECASE) # Delete non-numeric characters doublestring = re. sub ('[^ 0-9.e +-]', "", doublestring, count = 0, flags = re. IGNORECASE) # remove incorrect numeric format (it is necessary to repeat the Code) doublestring = re. sub ('[. e +-] (? = \ S) ', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub (' [. e +-] (? = \ S) ', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub ('[e +-] $', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub ('[e +-] $', "", doublestring, count = 0, flags = re. IGNORECASE) # Remove the leading and trailing spaces doublestring = doublestring. strip () if len (doublestring)> 0: StrIOds = StringIO (doublestring) DoubleList. append (numpy. genfromtxt (StrIOds) return DoubleList # effecdef GetDoubleListFromFile (FileName): "" # Replace all Double-type characters in the text file with numpy. array # each row is numpy. array # Return numpy. array list # Note: each element in the returned list is a numpy. array # Note: Each element (or each row of a file) in the returned list can contain different numbers "" file = open (FileName, 'R') read_file = file. read () file. close () DoubleList = GetDoubleListFromString (read_file) return DoubleListdef dlmread (FileName, dtype = numpy. float64): "" # Load Data From Txt-File. # The default Delimiter is: ";", space class (including \ t), and so on. # comments starting with # are considered and not read. # Return Value: two-dimensional numeric array (numpy. ndarray) # It has the lowest requirement on the data arrangement format in the text and supports comments. It has the highest degree of intelligence, but is slow. "DoubleList = GetDoubleListFromFile (FileName) dlsize = [] # the size of each array row for dL in DoubleList: dlsize. append (dL. size) MinColumnSize = min (dlsize) # maximum number of columns in the array MaxColumnSize = max (dlsize) # minimum number of columns in the array # create an array and assign a value to DoubleArray = numpy. empty ([len (DoubleList), MinColumnSize], dtype = dtype) row = range (0, len (DoubleList) colum = range (0, MinColumnSize) for I in row: for j in colum: DoubleArray [I] [j] = DoubleList [I] [j] return DoubleArray # javasdef loadtxtdata (filename, delimiter = ""): "# Load Data From Txt-File with delimiter. # default delimiter: ";", space class (including \ t) and custom delimiter # Return Value: Two-Dimensional numeric array (numpy. ndarray) # requires a high level of data arrangement format in the text, and does not allow the appearance of Comment Characters, low intelligence level, but the speed is fast "from StringIO import re file_handle = open (filename, 'R') LinesALL = file_handle.read () # Read the string file_handle.close () DelimiterALL = delimiter + ",; "# separator SpaceString =" "# space for RChar in DelimiterALL: LinesALL = LinesALL. replace (RChar, SpaceString) return numpy. genfromtxt (StringIO (LinesALL) # ---------------------------------------------------------------------- def savetxtdata (filename, X, fmt = '%. 8e', delimiter = '', newline = '\ n'):" "Save Data To Txt-File. "numpy. savetxt (filename, X, fmt = fmt, delimiter = delimiter, newline = newline) return True # effecdef dlmwrite (filename, X, fmt = '%. 8e', delimiter = '', newline = '\ n'):" "Save Data To Txt-File. "numpy. savetxt (filename, X, fmt = fmt, delimiter = delimiter, newline = newline) return True # Program # Test Program # ---------------------------------------------------------------------- if _ name _ = '_ main _': # generate random data = numpy. random. randn (3, 4) filename = 'd:/x.txt '# Write the file dlmwrite (filename, data) x = GetDoubleListFromFile (filename) print (x) print (dlmread (filename )) y = StringToDoubleArray ('79l890joj') print (y) z = loadtxtdata (filename) print (z)
I only tried it in python2.7. If you want to use it in python3.x, you can test it on your own.