Python for data analysis, chapter fourth, basic use of numpy

Source: Internet
Author: User
Tags arithmetic random seed scalar

The procedure of the fourth chapter of data analysis using Python introduces the basic use method of NumPy. (chapter III is the basic use of Ipython)

Scientific calculations, common functions, array processing, linear algebra operations, random modules ...

#-*-Coding:utf-8-*-
# Python for data analysis, chapter fourth, NumPy Foundation
# Arrays and Vector calculations
Import NumPy as NP
Import time

# Start Timing
Start = Time.time ()

# Create an array
data = Np.array ([[1, 2, 3, 4, 5, 6],
[7, 8, 9, ten, one, one]], Dtype=np.int32) # When the input type is a tuple, the effect is the same, you can specify the data type, or the default
Print (' data: '),
Print (data)
Print (' Data.shape: '),
Print (Data.shape) # equivalent to Np.shape (data), (2l,6l), tuple type
Print (' Data.dtype: '),
Print (Data.dtype) # Displays the variable type of the element, Int32
Print (' type ' (data): '),
Print (type data) # Displays the variable type for the entire array, Numpy.ndarray
Print (' ↑---------------------class1\n\n ')

# quickly create some special-format arrays
Print (Np.ones (shape= (2, 3)) # Prints a 2*3, the element is all 1 matrix
Print (Np.zeros (shape= (3, 2)) # Prints a 3*2, the element is all 0 matrix
Print (Np.empty (shape= (3, 5)) # prints a 3*5, element random matrix
Print (Np.arange (0, 3)) # Np.arange with Python built-in Range,range () produces a list object, Np.arange produces an array object. Result:array ([0,1,2])
Print (Np.eye (5)) # Create a five-order unit matrix, equivalent to Np.identity (5)
Print (' ↑---------------------class2\n\n ')

# Data types for Ndarray
# use. Astype () for data type conversion, int to float, note that float to int is truncated
Print (Data.dtype)
data = Data.astype (Np.float64)
Print (Data.dtype)
# string Turn float
data = Np.array ([' 1.23 ', ' 2.34 ', ' 3.45 '])
data = Data.astype (Np.float64)
Print (data)
Print (Data.dtype)
Print (' ↑---------------------class3\n\n ')

# array and array, array to scalar operations
# 1, array and scalar operations, propagate scalar values to individual elements within the array according to the same arithmetic rules
data = Np.arange (1, 6)
Print (' data: '),
Print (data)
data = data + 2
Print (' data+2: '),
Print (data)
# 2, array and array operations, and two arrays of the same shape, at this time any arithmetic operation will be applied to the element level
Data1 = Np.arange (1, 6)
Data2 = Np.ones (5). Astype (Np.int32)
Print (' \ndata1: '),
Print (DATA1)
Print (' data2: '),
Print (DATA2)
Print (' data1 * data2: '),
Print (DATA1 * data2)
Print (' Data1 + data2: '),
Print (data1 + data2)
# 3, array and array operations, and the shape of the two array is not the same, when the NumPy broadcast (broadcast) mechanism is enabled
# HTTP://BAIJIAHAO.BAIDU.COM/S?ID=1580719686756248184&WFR=SPIDER&FOR=PC broadcast mechanism diagram description process
Data1 = Np.arange (1, 6). Reshape (-1, 1)
Data2 = Np.arange (1, 6)
Print (' \ndata1: '),
Print (DATA1)
Print (' data2: '),
Print (DATA2)
Print (' Data1 + data2: '),
Print (data1 + data2)
Print (' ↑---------------------class4\n\n ')

# Basic indexes and slices
# Create a slice
data = Np.arange (1, 10)
Slice_data = Data[2:5] # Slice_data is a slice of data
The #★★★★★array Slice is a view (reference) of the original array, and any modification of the data on the slice will be reflected directly on the original array
Print (data) # [1 2 3 4 5 6 7 8 9]
Print (Slice_data) # [3 4 5]
SLICE_DATA[1] = 100
Print (Slice_data) # [3 100 5]
Print (data) # [1 2 3 100 5 6 7 8 9]
# This is completely different from the list's slicing rules, and the list's slice is a copy of the original list
Origin_list = Range (1, 10)
Slice_list = Origin_list[2:5]
Print (origin_list) # [1 2 3 4 5 6 7 8 9]
Print (slice_list) # [3 4 5]
SLICE_LIST[1] = 100
Print (slice_list) # [3 100 5]
Print (origin_list) # [1 2 3 4 5 6 7 8 9]
# The slice of an array can also copy data as a list, rather than referencing it, by using. Copy ().
data = Np.arange (1, 10)
Slice_data = Data[2:5].copy () # Slice_data is a slice of data, copied, not referenced
Print (data) # [1 2 3 4 5 6 7 8 9]
Print (Slice_data) # [3 4 5]
SLICE_DATA[1] = 100
Print (Slice_data) # [3 100 5]
Print (data) # [1 2 3 4 5 6 7 8 9]
# Indexes of one-dimensional arrays are written directly to scalar implementations
# The index of two-dimensional and above arrays is a one-dimensional array, and the line vector and the column vector are equivalent
data_2d = Np.empty (shape= (3, 3))
Print (data_2d[1, 2] = = Data_2d[1][2])
Print (' ↑---------------------class5\n\n ')

# index method, 3 kinds
# 1, slice index, slice as the original array view, data reference
data = Np.arange (1, 6) # array ([1,2,3,4,5])
Data_slice = data[1:3] # array ([2,3]),
Print (Data_slice)
Data_nd = Np.arange (1). Reshape (3, 3) # The slice index of a high-dimensional array can also be mixed with an integer index
Data_slice = data_nd[1, 1:] # array ([5,6])
Print (Data_slice)
Data_slice = data_nd[2,:] # array ([7,8,9]),: Indicates selection of the entire axis
Print (Data_slice)
Print (")
# 2, Boolean index, slice for data copy
name = Np.array ([' A ', ' B ', ' A ', ' B ', ' C ', ' d '])
data = Np.arange (1, +). Reshape (6, 3)
Data_slice = Data[name = = ' A ']
Print (data)
Print (Data_slice)
DATA_SLICE[1] = 1 # The subset produced by the Boolean index is the copy of the data rather than the view, and the change to the subset does not affect the original array
Print (data)
Print (Data_slice)
Data[data > 10] = 10
Print (data)
data[(data = = 2) | (data = = 7)] = 1
Print (data)
Print (")
# 3, Fancy Index (fancy indexing), indexed with an array of integers
data = Np.arange (1, ten). Reshape (3, 3)
Data_slice = data[[1, 2, 0]]
Print (data)
Print (Data_slice)
DATA_SLICE[1] = 1 # The subset produced by the Fancy Index is also a copy of the data
Print (data)
Print (Data_slice)
Data_slice = data[[0, 1]][:, [0, 1]] # The fancy index can also be selected as a rectangular area of the Zhongyuan array, or it can be written as data[np.ix_ ([0,1],[0,1]])]
Print (Data_slice)
Print (' ↑---------------------class6\n\n ')

# Data Transpose and Axis swapping
data = Np.arange (1, ten). Reshape (3, 3)
Print (data)
data_t = data. T # Transpose, can also be written as data = Data.transpose ()
Print (data_t)
DATA_T[1] = 1 # transpose Returns the view of the original array with no data copied
Print (data_t)
Print (data)
Print (' ↑---------------------class7\n\n ')

# General functions, using an array, and performing element-level function operations on the data
# abs\square\sqrt\sign\ceil\floor\rint\mod\exp, wait.
data = Np.arange (10)
Data2 = np.sqrt (data)
Print (data)
Print (DATA2)
Print (' ↑---------------------class8\n\n ')

# array operations for conditional logic, substituting np.where for x if condition else y
A = Np.array ([1, 2, 3, 4, 5])
b = Np.array ([6, 7, 8, 9, 0])
condition = Np.array ([True, False, True, True, false])
RESULT1 = [(A If condition else B) for a, B, condition in zip (A, B, condition)]
Print (RESULT1)
A = Np.array ([1, 2, 3, 4, 5])
b = Np.array ([6, 7, 8, 9, 0])
condition = Np.array ([True, False, True, True, false])
RESULT2 = Np.where (condition, A, B)
Print (RESULT2)
# np.where can be used for high-dimensional arrays and X if condition else y not
data = Np.arange (9). Reshape (3, 3)
Data2 = Np.where (Data < 5, 5, data)
Print (data)
Print (DATA2)
Print (' ↑---------------------class9\n\n ')

# array statistics, which are counted by some functions on an entire array or an axis on an array
# sum, mean, STD, var, min, Max, argmin (index corresponding to minimum value), Argmax, cumsum (all elements cumulative sum), Cumprod (cumulative product of all elements)
data = Np.arange (9). Reshape (3,-1)
mean = Data.mean () # averages the entire array
Print (mean)
Mean2 = Data.mean (1) # Average on the second axis, i.e. average of each column
Print (MEAN2)
Cumsum = Data.cumsum ()
Print (cumsum) # High-dimensional will automatically turn to 1-D
Print (' ↑--------------------class10\n\n ')

# Boolean Array operation
# The elements of the Boolean array are true and false, and true is visible as 1,false as 0 during the operation
data = Np.array ([True, False, True, True])
Print (Data.sum ())
#. Any () to check if the elements in the array are all 0,.all () to check if the elements in the array are all 1
# any ()--there is a true return 1, whereas 0;.all ()--All true returns 1, whereas 0
Print (Data.any ())
Print (Data.all ())
#. Any (),. All () when operating on a non-boolean array, all non-0 elements are treated as true
data = Np.array ([-1, 1])
Print (Data.all ())
Print (' ↑--------------------class11\n\n ')

# array Sorting
data = NP.RANDOM.RANDN (6)
Print (data)
# Two types of notation are equivalent
# data = Np.sort (data) # with Np.sort () function does not change the original array, need to assign a value operation
Data.sort () # the sort () method automatically operates on an element group without having to re-assign a value
Print (data)
# A high-dimensional array can sort an axis
data = Np.random.randn (3, 3)
Print (data)
Data.sort (1) # Sort the second axis (that is, each row)
Print (data)
Print (' ↑--------------------class12\n\n ')

# set operation
# array Element Uniqueness
data = [1, 2, 3, 3, 3]
data = Np.unique (data) # Np.unique does not perform a deduplication operation and needs to be assigned
Print (data)
# INTERSECT1D (x, y) returns the common element, which is the intersection, ordered; UNION1D (x, Y) returns and sets, ordered; SETXOR1D (x, y) returns an XOR set of two sets (element-level operations)
# setdiff1d (x, y) like the difference of a set, i.e. an element in x that is not in Y (asymmetric, note this)
Data1 = [1, 2, 3]
Data2 = [1, 3, 5]
Delta = np.setdiff1d (data1, data2)
Print (Delta)
# in1d (x, y) on an element-level object that returns whether each element in X is in Y
Inornot = np.in1d (data1, data2)
Print (Inornot)
Print (' ↑--------------------class13\n\n ')

# Saving and reading of data
data = Np.arange (1, ten). Reshape (3, 3)
Np.save (' data.npy ', data) # Saves the array data in binary format, file format. npy
Data2 = Np.load (' data.npy ') # Read, reverse process on previous line
Print (DATA2)
data = Np.random.randn (3, 3)
Np.savetxt (' data.txt ', data, delimiter= ', ') # Save data in TXT format
Data2 = Np.loadtxt (' data.txt ', delimiter= ', ') # Read TXT file to data2 this array
Print (DATA2)
Print (' ↑--------------------class14\n\n ')

# linear algebra operations of matrices
# NumPy put the usual linear algebra operations in the NUMPY.LINALG library
Data1 = Np.arange (1, ten). Reshape (3, 3)
Data2 = Np.eye (3) + Np.ones ((3, 3))
Print (DATA1)
Print (DATA2)
Print (")
Multiply = Np.dot (data1, data2)
Print (multiply)
Print (")
INV = NP.LINALG.INV (data2) # inverse matrix
Print (INV)
Print (Np.dot (data2, INV))
Det = Np.linalg.det (data1) # determinant
Print (")
Print (DET) # non-full rank matrix
LAMDA, x = Np.linalg.eig (data2) # Find eigenvalues (LAMDA) and eigenvectors (x)
Print (x)
Print (LAMDA)
x = x.t # transpose and take out each line is the eigenvector of each characteristic value corresponding
Print (Np.dot (data2, x[0]))
Print (lamda[0] * x[0]) # This line is the same as the previous row, verifying that a x = Lamda x
Print (")
# Linalg also traces, QR decomposition, SVD decomposition, solving linear equations, least squares and other functions
Print (' ↑--------------------class15\n\n ')

# random module, Np.random
# random.seed (), random seed, enables random characteristics to be reproduced
To point out, a seed can be the randomness of the entire program, but the randomly generated values in different places are different, and each random number is the same as the given seed before each generation of random numbers.
Np.random.seed (0)
For I in range (5):
Print (Np.random.random ()) #. Random () generates a stochastic number
Print (")
For I in range (5):
Np.random.seed (0)
Print (NP.RANDOM.RANDN ())
Print (NP.RANDOM.RANDN (3, 3)) # RANDN () generates random values according to a standard normal distribution to specify the shape
For I in range (5):
Print (Np.random.normal (loc=50, scale=10)) # Normal distribution for given mean and variance, loc mean, scale standard deviation
Print (Np.random.permutation (10)) # generates a random sequence of a given length
# There are other distributed random number generation functions, and beta () generates random numbers that follow the beta distribution; Chisquare ()-chi-square distribution;
# gamma ()-gamma distribution; uniform ()-[0,1) uniform distribution within the interval
Print (' ↑--------------------class16\n\n ')

# End
Print ("----------that's all-----------------\ n Total time is%.5f S"% (Time.time ()-start))

Python for data analysis, chapter fourth, basic use of numpy

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.