Python programming: getting started with pandas and getting started with pythonpandas
After finding the time to learn pandas, I learned a part of it first, and I will continue to add it later.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Create sequence, let pandas create default integer index
s = pd.Series ([1, 3, 5, np.nan, 6, 8])
print (s)
"""
0 1.0
1 3.0
2 5.0
3 NaN
4 6.0
5 8.0
dtype: float64
"""
# Data read
# csv file read
# Default first row and column name
# (Data from the first line header = None)
data = pd.read_csv ("data.csv")
print (data)
"""
id name age score
0 1 tom 12 98
1 2 tom 12 98
2 3 tom 12 98
3 4 tom 12 98
4 5 tom 12 98
5 6 tom 12 98
6 7 tom 12 98
7 8 tom 12 98
8 9 tom 12 98
9 10 tom 12 98
"""
# Display the first few rows of data (the first five rows by default)
print (data.head ())
"""
id name age score
0 1 tom 12 98
1 2 tom 12 98
2 3 tom 12 98
3 4 tom 12 98
4 5 tom 12 98
"""
# Read tail data
print (data.tail ())
"""
id name age score
5 6 tom 12 98
6 7 tom 12 98
7 8 tom 12 98
8 9 tom 12 98
9 10 tom 12 98
"""
# Display column names
print (data.columns)
# Index (['id', 'name', 'age', 'score'], dtype = 'object')
# Display line number
print (data.index)
# RangeIndex (start = 0, stop = 10, step = 1)
# Display size
print (data.shape)
# (10, 4)
# Indexing and calculations
# The column name defaults to a string (Object). The index of the row defaults to an integer
# Extract line
print (data.loc [3: 6])
"""
id name age score
3 4 tom 12 98
4 5 tom 12 98
5 6 tom 12 98
6 7 tom 12 98
"""
# Use list to extract rows
print (data [3: 6])
"""
id name age score
3 4 tom 12 98
4 5 tom 12 98
5 6 tom 12 98
"""
# Extract columns
columns = ["name", "age"]
print (data [columns])
"""
name age
0 tom 12
1 tom 12
2 tom 12
3 tom 12
4 tom 12
5 tom 12
6 tom 12
7 tom 12
8 tom 12
9 tom 12
"""
# Column name to list
print (data.columns.tolist ())
# ['id', 'name', 'age', 'score']
print (data.columns.values)
# ['id' 'name' 'age' 'score']
print (data.columns.values.dtype)
# object
# Slice
print (data.loc [3: 6] [["name", "age"]])
"""
name age
3 tom 12
4 tom 12
5 tom 12
6 tom 12
"""
print (data ["name"])
"""
0 tom
1 tom
2 tom
3 tom
4 tom
5 tom
6 tom
7 tom
8 tom
9 tom
Name: name, dtype: object
"""
# Extreme value
print (data ["id"]. max ())
# 10
# Operation
num = data.shape [0]
age_average = data ["id"] / num
print (age_average.head ())
"""
0 0.1
1 0.2
2 0.3
3 0.4
4 0.5
Name: id, dtype: float64
"""
# Sort
data.sort_values ("id", inplace = False, ascending = False)
print (data.head ())
View comments