Python uses regular expressions to search for floating point code instances in strings or files,

Source: Internet
Author: User

Python uses regular expressions to search for floating point code instances in strings or files,

Python and numpy are used to process a large amount of data. Several small functions are written to facilitate data reading and writing:

#-*-Coding: UTF-8-*-# ---------------------------------------------------------------------- # FileName: gettxtdata. py # function: reads numeric Data (floating point number) in strings and files # mainly provides functions like dlmread and dlmwrite in matlab # loadtxtdata and savetxtdata functions # Data: 2013-1-10 # Author: wu xuping # define import numpy # -------------------------------------------------------------------------- def StringToDoub LeArray (String): "" # Replace all non-Double characters in the String with spaces # comment at the beginning of '#' until the end of the line, all are cleared # One-dimensional numpy is returned. array "from StringIO import re DataArray = numpy. empty ([0], numpy. float64) if len (String. strip ()> 0: # clear comments, all of which start with '#' and subcharacter doublestring = re. sub ('#. * $ ', "", String, count = 0, flags = re. IGNORECASE) # Delete non-numeric characters doublestring = re. sub ('[^ 0-9.e +-]', "", doublestring, count = 0, flags = re. IGNORECASE) # remove incorrect numeric format (Code repetition is required Required) doublestring = re. sub ('[. e +-] (? = \ S) ', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub (' [. e +-] (? = \ S) ', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub ('[e +-] $', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub ('[e +-] $', "", doublestring, count = 0, flags = re. IGNORECASE) # Remove the leading and trailing spaces doublestring = doublestring. strip () if len (doublestring)> 0: StrIOds = StringIO (doublestring) DataArray = numpy. genfromtxt (StrIOds) return DataArray #------------------------------------ ---------------------------------- Def GetDoubleListFromString (String): "" # Use line breaks to separate strings # Replace all non-Double characters in strings with spaces # Start with '#' and end with comments until the end of the line, are cleared # convert each row to numpy. array # returns numpy. list of array "from StringIO import re DoubleList = [] StringList = String. split ('\ n') # Use A linefeed to separate the string for Line in StringList: if len (Line. strip ()> 0: # clear comments, all of which start with '#' and subcharacter doublestring = re. sub ('#. * $ ', "", Line, count = 0, flags = re. IGNORECASE) # Delete non-numeric characters doublestring = re. sub ('[^ 0-9.e +-]', "", doublestring, count = 0, flags = re. IGNORECASE) # remove incorrect numeric format (it is necessary to repeat the Code) doublestring = re. sub ('[. e +-] (? = \ S) ', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub (' [. e +-] (? = \ S) ', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub ('[e +-] $', "", doublestring, count = 0, flags = re. IGNORECASE) doublestring = re. sub ('[e +-] $', "", doublestring, count = 0, flags = re. IGNORECASE) # Remove the leading and trailing spaces doublestring = doublestring. strip () if len (doublestring)> 0: StrIOds = StringIO (doublestring) DoubleList. append (numpy. genfromtxt (StrIOds) return DoubleList # effecdef GetDoubleListFromFile (FileName): "" # Replace all Double-type characters in the text file with numpy. array # each row is numpy. array # Return numpy. array list # Note: each element in the returned list is a numpy. array # Note: Each element (or each row of a file) in the returned list can contain different numbers "" file = open (FileName, 'R') read_file = file. read () file. close () DoubleList = GetDoubleListFromString (read_file) return DoubleListdef dlmread (FileName, dtype = numpy. float64): "" # Load Data From Txt-File. # The default Delimiter is: ";", space class (including \ t), and so on. # comments starting with # are considered and not read. # Return Value: two-dimensional numeric array (numpy. ndarray) # It has the lowest requirement on the data arrangement format in the text and supports comments. It has the highest degree of intelligence, but is slow. "DoubleList = GetDoubleListFromFile (FileName) dlsize = [] # the size of each array row for dL in DoubleList: dlsize. append (dL. size) MinColumnSize = min (dlsize) # maximum number of columns in the array MaxColumnSize = max (dlsize) # minimum number of columns in the array # create an array and assign a value to DoubleArray = numpy. empty ([len (DoubleList), MinColumnSize], dtype = dtype) row = range (0, len (DoubleList) colum = range (0, MinColumnSize) for I in row: for j in colum: DoubleArray [I] [j] = DoubleList [I] [j] return DoubleArray # javasdef loadtxtdata (filename, delimiter = ""): "# Load Data From Txt-File with delimiter. # default delimiter: ";", space class (including \ t) and custom delimiter # Return Value: Two-Dimensional numeric array (numpy. ndarray) # requires a high level of data arrangement format in the text, and does not allow the appearance of Comment Characters, low intelligence level, but the speed is fast "from StringIO import re file_handle = open (filename, 'R') LinesALL = file_handle.read () # Read the string file_handle.close () DelimiterALL = delimiter + ",; "# separator SpaceString =" "# space for RChar in DelimiterALL: LinesALL = LinesALL. replace (RChar, SpaceString) return numpy. genfromtxt (StringIO (LinesALL) # ---------------------------------------------------------------------- def savetxtdata (filename, X, fmt = '%. 8e', delimiter = '', newline = '\ n'):" "Save Data To Txt-File. "numpy. savetxt (filename, X, fmt = fmt, delimiter = delimiter, newline = newline) return True # effecdef dlmwrite (filename, X, fmt = '%. 8e', delimiter = '', newline = '\ n'):" "Save Data To Txt-File. "numpy. savetxt (filename, X, fmt = fmt, delimiter = delimiter, newline = newline) return True # Program # Test Program # ---------------------------------------------------------------------- if _ name _ = '_ main _': # generate random data = numpy. random. randn (3, 4) filename = 'd:/x.txt '# Write the file dlmwrite (filename, data) x = GetDoubleListFromFile (filename) print (x) print (dlmread (filename )) y = StringToDoubleArray ('79l890joj') print (y) z = loadtxtdata (filename) print (z)

I only tried it in python2.7. If you want to use it in python3.x, you can test it on your own.




Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.