Python Pandas usage experience

Source: Internet
Author: User

Function Prototypes:
Https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.fillna.html#pandas.DataFrame.fillna

Pad/ffill: Fills the missing value with the previous non-missing value
Backfill/bfill: Fills the missing value with the next non-missing value
None: Specify a value to replace the missing value

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21st
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Coding:utf-8
Import PandasAs PD

DF = PD. DataFrame ([[1,None,2],
[None,3,None],
[None,4,5])

Print' Origin ')
Print (DF)
# 0 1 2
# 0 1.0 NaN 2.0
# 1 Nan 3.0 nan
# 2 NaN 4.0 5.0

Print' Left ')
data = Df.bfill (axis=1). iloc[:,0]
Print (data)
# 0 1.0
# 1 3.0
# 2 4.0

Print' Up ')
data = Df.bfill (). iloc[:,-1]
Print (data)
# 0 2.0
# 1 5.0
# 2 5.0

Print' Left ')
data = Df.fillna (method=' Bfill ', axis=1)
Print (data)
# 0 1 2
# 0 1.0 2.0 2.0
# 1 3.0 3.0 NaN
# 2 4.0 4.0 5.0

Print' Up ')
data = Df.fillna (method=' Bfill ')
Print (data)
# 0 1 2
# 0 1.0 3.0 2.0
# 1 NaN 3.0 5.0
# 2 NaN 4.0 5.0

Print' Right ')
data = Df.fillna (method=' Ffill ', axis=1)
Print (data)
# 0 1 2
# 0 1.0 1.0 2.0
# 1 NaN 3.0 3.0
# 2 NaN 4.0 5.0

Print' Down ')
data = Df.fillna (method=' Ffill ')
Print (data)
# 0 1 2
# 0 1.0 NaN 2.0
# 1 1.0 3.0 2.0
# 2 1.0 4.0 5.0

Print' Left ')
data = Df.fillna (method=' Backfill ', axis=1)
Print (data)
# 0 1 2
# 0 1.0 2.0 2.0
# 1 3.0 3.0 NaN
# 2 4.0 4.0 5.0

Print' Up ')
data = Df.fillna (method=' Backfill ')
Print (data)
# 0 1 2
# 0 1.0 3.0 2.0
# 1 NaN 3.0 5.0
# 2 NaN 4.0 5.0

Print' Right ')
data = Df.fillna (method=' pad ', axis=1)
Print (data)
# 0 1 2
# 0 1.0 1.0 2.0
# 1 NaN 3.0 3.0
# 2 NaN 4.0 5.0

Print (' down ')
data = Df.fillna (method=' pad ')
Print (data)
# 0 1 2
# 0 1.0 NaN 2.0
# 1 1.0 3.0 2.0
# 2 1.0 4.0 5.0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
Import Pandas as PD
Import NumPy as NP

DF = PD. DataFrame ([[Np.nan, 2, Np.nan, 0],
[3, 4, Np.nan, 1],
[Np.nan, Np.nan, Np.nan, 5],
[Np.nan, 3, Np.nan, 4]],
columns = List (' ABCD '))
Print (DF)
# A B C D
# 0 Nan 2.0 nan 0
# 1 3.0 4.0 NaN 1
# 2 Nan Nan nan 5
# 3 Nan 3.0 nan 4

Print (Df.fillna (0))
# A B C D
# 0 0.0 2.0 0.0 0
# 1 3.0 4.0 0.0 1
# 2 0.0 0.0 0.0 5
# 3 0.0 3.0 0.0 4

print (Df.fillna (method= ' Ffill '))
# A B C D
# 0 NaN 2.0 Nan 0
# 1 3.0 4.0 NaN 1
# 2 3.0 4.0 NaN 5
# 3 3.0 3.0 Na N 4
values = {' A ': 0, ' B ': 1, ' C ': 2, ' D ': 3}
print ( Df.fillna (value=values))
# A B C D
# 0 0.0 2.0 2.0 0
# 1 3.0 4.0 2.0 1
# 2 0.0 1.0 2.0 5
# 3 0.0 3.0 2.0 4
print (Df.fillna (value=values, limit=1))
# A B C D
# 0 0.0 2.0 2.0 0
# 1 3.0 4.0 nan 1
# 2 nan 1.0 nan 5
# 3 Nan 3.0 nan 4

If the imported dataframe contains a dictionary, use data.join (data[' A10 '].apply (json.loads). Apply (PD. Series ) to split the dictionaries into different columns.

 1 
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
Import Pandas as PD
Import JSON

filename = ' Top5.txt '
data = pd.read_csv (filename, sep= "\ T", Header=none)

# test model.8.10 modelname 810 8101 2018-03-28 04:21:13 2018-03-28 04:21:13
# 1 0 2018-04-02 14:50:54 {"Cell_info": "LTE plmn:46000 earfcn:38400 (B39) cell Identity
#: 197539969 pci:141 tac:37884 rssi:-65 rsrp:-95 rsrq:-11 sinr*10:133 EMM state:registered
# service State:normal reg DOMAIN:CS_PS Lte_tx_power tx = 9 Lte_rx_chain0 rssi=-64 rsrp=-94
# sinr=133 lte_rx_chain1 rssi=-69 rsrp=-99 sinr=118 "," Log_from ":" Com.android.phone ",
# "Reg_at_time": "31112", "rat": "+", "Reg_during_time": "3554", "HPLMN": "46002"} 2018-04-02

columns = []
For I in range (Data.shape[1]):
Columns.Append (' A ' + str (i))
Data.columns = Columns
Print (Data.columns)
# Index ([' A0 ', ' A1 ', ' A2 ', ' A3 ', ' A4 ', ' A5 ', ' A6 ', ' A7 ', ' A8 ', ' A9 ', ' A10 ', ' A11 '),
# dtype= ' object ')

Print (data[' A10 ')
# 0 {"Cell_info": "LTE plmn:46000 earfcn:38400 (B39 ...
data = Data.join (data[' A10 '].apply (json.loads). Apply (PD. Series))

Print (Data.columns)
# Index ([' A0 ', ' A1 ', ' A2 ', ' A3 ', ' A4 ', ' A5 ', ' A6 ', ' A7 ', ' A8 ', ' A9 ', ' A10 ',
# ' A11 ', ' cell_info ', ' hplmn ', ' log_from ', ' rat ', ' reg_at_time ',
# ' Reg_during_time '),
# dtype= ' object ')

Python Pandas usage experience

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.