slice(start, stop)
slice()
函数只能处理字符型数据start
从0开始,取值范围前闭后开。from pandas import read_csv
df = read_csv(
'/users/bakufu/desktop/4.6/data.csv'
)
Out[65]:
tel
0 18922254812
1 13522255003
2 13422259938
3 18822256753
4 18922253721
5 13422259313
6 13822254373
7 13322252452
8 18922257681
#使用`astype()`函数将数据转换为str型,并重新赋给原值
df['tel'] = df['tel'].astype(str)
Out[68]:
0 18922254812
1 13522255003
2 13422259938
3 18822256753
4 18922253721
5 13422259313
6 13822254373
7 13322252452
8 18922257681
Name: tel, dtype: object
#截取运营商数值
bands = df['tel'].str.slice(0, 3)
Out[70]:
0 189
1 135
2 134
3 188
4 189
5 134
6 138
7 133
8 189
Name: tel, dtype: object
#截取地区数值
areas = df['tel'].str.slice(3, 7)
Out[72]:
0 2225
1 2225
2 2225
3 2225
4 2225
5 2225
6 2225
7 2225
8 2225
Name: tel, dtype: object
#截取号码段数值
nums = df['tel'].str.slice(7, 11)
Out[74]:
0 4812
1 5003
2 9938
3 6753
4 3721
5 9313
6 4373
7 2452
8 7681
Name: tel, dtype: object
#赋值回去,原值由Series转换为DataFrame,并生成新的三列
df['bands'] = bands
df['areas'] = areas
df['nums'] = nums
Out[76]:
tel bands areas nums
0 18922254812 189 2225 4812
1 13522255003 135 2225 5003
2 13422259938 134 2225 9938
3 18822256753 188 2225 6753
4 18922253721 189 2225 3721
5 13422259313 134 2225 9313
6 13822254373 138 2225 4373
7 13322252452 133 2225 2452
8 18922257681 189 2225 7681
split(sep, n, expand=False)
参数说明
expand返回值:
from pandas import read_csv
df = read_csv(
'/users/bakufu/desktop/4.7/data.csv'
)
屏幕快照 2018-07-01 19.52.26.png
newDF = df['name'].str.split(' ', 1, True)
newDF.columns = ['band', 'name']
屏幕快照 2018-07-01 19.52.00.png
dataframe[condition]
import pandas
df = pandas.read_csv(
'/users/bakufu/desktop/4.8/data.csv',
sep = '|' #分隔符是|
)
屏幕快照 2018-07-02 06.06.22.png
newDF = df[df.comments > 10000]
屏幕快照 2018-07-02 06.09.18.png
newDF = df[df.comments.between(1000, 10000)]
屏幕快照 2018-07-02 06.10.39.png
newDF = df[pandas.isnull(df.title)]
屏幕快照 2018-07-02 06.11.48.png
newDF = df[~pandas.isnull(df.title)]
屏幕快照 2018-07-02 06.19.15.png
newDF = df[df.title.str.contains('台电', na=False)]
屏幕快照 2018-07-02 06.35.20.png
newDF = df[~df.title.str.contains('台电', na=False)]
屏幕快照 2018-07-02 06.35.47.png
newDF = df[(df.comments >= 1000) & (df.comments <= 10000)]
屏幕快照 2018-07-02 06.36.41.png