分箱:抽象理解为苹果根据大小不同分级分箱
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
# 模拟成绩分箱
score_list = np.random.randint(35, 100, size=20)
score_list
array([93, 35, 83, 44, 56, 62, 37, 86, 44, 82, 49, 91, 49, 82, 53, 89, 47,
56, 38, 86])
# 成绩评级分段
bins = [0,59,70,80,100]
# 分箱(返回Categories类型)
score_cut = pd.cut(score_list, bins)
score_cut
[(80, 100], (0, 59], (80, 100], (0, 59], (0, 59], ..., (80, 100], (0, 59], (0, 59], (0, 59], (80, 100]]
Length: 20
Categories (4, interval[int64]): [(0, 59] < (59, 70] < (70, 80] < (80, 100]]
# 每一个分箱多少人
pd.value_counts(score_cut)
(0, 59] 11
(80, 100] 8
(59, 70] 1
(70, 80] 0
dtype: int64
# 将模拟的成绩 放入DataFrame
df = DataFrame()
df['score'] = score_list
df
score | |
---|---|
0 | 93 |
1 | 35 |
2 | 83 |
3 | 44 |
4 | 56 |
5 | 62 |
6 | 37 |
7 | 86 |
8 | 44 |
9 | 82 |
10 | 49 |
11 | 91 |
12 | 49 |
13 | 82 |
14 | 53 |
15 | 89 |
16 | 47 |
17 | 56 |
18 | 38 |
19 | 86 |
# 填充长度为3的随机字符串
df['student'] = [pd.util.testing.rands(3) for i in range(20)]
df
score | student | |
---|---|---|
0 | 93 | 8c1 |
1 | 35 | cHy |
2 | 83 | 6xy |
3 | 44 | 6gY |
4 | 56 | tc5 |
5 | 62 | r5T |
6 | 37 | 3z3 |
7 | 86 | vsy |
8 | 44 | F6h |
9 | 82 | hgC |
10 | 49 | xA9 |
11 | 91 | iLZ |
12 | 49 | BVK |
13 | 82 | E9C |
14 | 53 | rbE |
15 | 89 | hSL |
16 | 47 | AIt |
17 | 56 | Gdk |
18 | 38 | AFX |
19 | 86 | JhU |
# 利用pd.cut将数据处理并填充到DataFrame
pd.cut(df['score'], bins)
0 (80, 100]
1 (0, 59]
2 (80, 100]
3 (0, 59]
4 (0, 59]
5 (59, 70]
6 (0, 59]
7 (80, 100]
8 (0, 59]
9 (80, 100]
10 (0, 59]
11 (80, 100]
12 (0, 59]
13 (80, 100]
14 (0, 59]
15 (80, 100]
16 (0, 59]
17 (0, 59]
18 (0, 59]
19 (80, 100]
Name: score, dtype: category
Categories (4, interval[int64]): [(0, 59] < (59, 70] < (70, 80] < (80, 100]]
# cut的标签化
df['Categories'] = pd.cut(df['score'], bins, labels=['low','ok','good','great'])
df
score | student | Categories | |
---|---|---|---|
0 | 93 | 8c1 | great |
1 | 35 | cHy | low |
2 | 83 | 6xy | great |
3 | 44 | 6gY | low |
4 | 56 | tc5 | low |
5 | 62 | r5T | ok |
6 | 37 | 3z3 | low |
7 | 86 | vsy | great |
8 | 44 | F6h | low |
9 | 82 | hgC | great |
10 | 49 | xA9 | low |
11 | 91 | iLZ | great |
12 | 49 | BVK | low |
13 | 82 | E9C | great |
14 | 53 | rbE | low |
15 | 89 | hSL | great |
16 | 47 | AIt | low |
17 | 56 | Gdk | low |
18 | 38 | AFX | low |
19 | 86 | JhU | great |