2018.03.26 common Python-Pandas string methods,
Import numpy as np
Import pandas as pd
1 # common string method-strip 2 s = pd. series (['jack', 'jill', 'jease ', 'feank']) 3 df = pd. dataFrame (np. random. randn (3, 2), columns = ['column A', 'column B '], index = range (3) 4 print (s) 5 print (df. columns) 6 7 print ('----') 8 print (s. str. lstrip (). values) # Remove the space 9 print (s. str. rstrip (). values) # Remove the space on the right 10 df. columns = df. columns. str. strip () 11 print (df. columns)
Result:
0 jack 1 jill2 jease 3 feankdtype: objectIndex([' Column A', ' Column B'], dtype='object')----['jack ' 'jill' 'jease ' 'feank'][' jack' 'jill' ' jease' 'feank']Index(['Column A', 'Column B'], dtype='object')
# String common method-replace replacement string df = pd. dataFrame (np. random. randn (3, 2), columns = ['columns A', 'columns B '], index = range (3) print (df. columns) df. columns = df. columns. str. replace ('', '-') print (df. columns) df. columns = df. columns. str. replace ('-', 'hehes', n = 1) # replace the first ''print (df. columns)
Result:
Index([' Columns A', ' Columns B'], dtype='object')Index(['-Columns-A', '--Columns-B'], dtype='object')Index(['heheColumns-A', 'hehe-Columns-B'], dtype='object')
# String common methods-split and rsplit are divided into lists in the form of s = pd. series (['a, B, C', '1, 2,3 ', ['a, C'], np. nan]) print (s) print ('----') print (s. str. split (',') print ('----') # splitprint (s. str. split (',') [0]) # index print (s. str. split (','). str [0]) # print (s. str. split (','). str. get (1) # second column # You can use the get or [] symbol to access the print (s. str. split (',', expand = True, n = 1) # n is the number of extended print (s. str. rsplit (',', expand = True, n = 1 )) # rsplit from right to left # expand can be expanded to return DataFrame # n parameter limit score # rsplit is similar to split, which works in reverse order, print ('dataframe: ') df = pd. dataFrame ({'key1': ['a, B, C', '1, 2,3 ', [', '], 'key2 ': ['a-B-C', '1-2-c', [',-,-,']}) print (df ['key2']) print (df ['key2']. str. split ('-'))
Result:
0 a, B, c
1 1, 2, 3
2 [a, c]
3 NaN
Dtype: object
----
0 [a, B, c]
1 [1, 2, 3]
2 NaN
3 NaN
Dtype: object
----
['A', 'B', 'C']
0
1 1
2 NaN
3 NaN
Dtype: object
0 B
1 2
2 NaN
3 NaN
Dtype: object
0 1
0 a B, c
1, 2, 3
2 NaN
3 NaN
0 1
0 a, B c
1, 2, 3
2 NaN
3 NaN
Dataframe:
0 a-B-c
1 1-2-c
2 [,-,-,]
Name: key2, dtype: object
0 [a, B, c]
1 [1, 2, c]
2 NaN
Name: key2, dtype: object
# String index s = pd. series (['A', 'B', 'C', 'bbhel', '123', np. nan, 'Hj ']) df = pd. dataFrame ({'key1': list ('abcdef'), 'key2': ['hee', 'fv ', 'w', 'hjja', '123 ', np. nan]}) print (s, '\ n -----') print (s. str [0]) # obtain the first string print (s. str [: 2]) # print ('-----') print (df ['key2']. str [0]) # str is indexed in the same way as the string itself.
Result:
0 A1 b2 C3 bbhello4 1235 NaN6 hjdtype: object -----0 A1 b2 C3 b4 15 NaN6 hdtype: object0 A1 b2 C3 bb4 125 NaN6 hjdtype: object-----0 h1 f2 w3 h4 15 NaNName: key2, dtype: object