import pandas as pd
import numpy as np
import os
os.getcwd()
'D:\\Jupyter\\notebook\\Python数据清洗实战\\数据清洗之数据统计'
os.chdir('D:\\Jupyter\\notebook\\Python数据清洗实战\\数据')
df = pd.read_csv('online_order.csv', encoding='gbk', dtype={'cunstomer':str, 'order':str})
df.head(5)
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th></th>
<th>customer</th>
<th>order</th>
<th>total_items</th>
<th>discount%</th>
<th>weekday</th>
<th>hour</th>
<th>Food%</th>
<th>Fresh%</th>
<th>Drinks%</th>
<th>Home%</th>
<th>Beauty%</th>
<th>Health%</th>
<th>Baby%</th>
<th>Pets%</th>
</tr>
</thead>
<tbody>
<tr>
<th>0</th>
<td>0</td>
<td>0</td>
<td>45</td>
<td>23.03</td>
<td>4</td>
<td>13</td>
<td>9.46</td>
<td>87.06</td>
<td>3.48</td>
<td>0.00</td>
<td>0.00</td>
<td>0.00</td>
<td>0.0</td>
<td>0.0</td>
</tr>
<tr>
<th>1</th>
<td>0</td>
<td>1</td>
<td>38</td>
<td>1.22</td>
<td>5</td>
<td>13</td>
<td>15.87</td>
<td>75.80</td>
<td>6.22</td>
<td>2.12</td>
<td>0.00</td>
<td>0.00</td>
<td>0.0</td>
<td>0.0</td>
</tr>
<tr>
<th>2</th>
<td>0</td>
<td>2</td>
<td>51</td>
<td>18.08</td>
<td>4</td>
<td>13</td>
<td>16.88</td>
<td>56.75</td>
<td>3.37</td>
<td>16.48</td>
<td>6.53</td>
<td>0.00</td>
<td>0.0</td>
<td>0.0</td>
</tr>
<tr>
<th>3</th>
<td>1</td>
<td>3</td>
<td>57</td>
<td>16.51</td>
<td>1</td>
<td>12</td>
<td>28.81</td>
<td>35.99</td>
<td>11.78</td>
<td>4.62</td>
<td>2.87</td>
<td>15.92</td>
<td>0.0</td>
<td>0.0</td>
</tr>
<tr>
<th>4</th>
<td>1</td>
<td>4</td>
<td>53</td>
<td>18.31</td>
<td>2</td>
<td>11</td>
<td>24.13</td>
<td>60.38</td>
<td>7.78</td>
<td>7.72</td>
<td>0.00</td>
<td>0.00</td>
<td>0.0</td>
<td>0.0</td>
</tr>
</tbody>
</table>
</div>
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30000 entries, 0 to 29999
Data columns (total 14 columns):
customer 30000 non-null int64
order 30000 non-null object
total_items 30000 non-null int64
discount% 30000 non-null float64
weekday 30000 non-null int64
hour 30000 non-null int64
Food% 30000 non-null float64
Fresh% 30000 non-null float64
Drinks% 30000 non-null float64
Home% 30000 non-null float64
Beauty% 30000 non-null float64
Health% 30000 non-null float64
Baby% 30000 non-null float64
Pets% 30000 non-null float64
dtypes: float64(9), int64(4), object(1)
memory usage: 3.2+ MB
df.columns
Index(['customer', 'order', 'total_items', 'discount%', 'weekday', 'hour',
'Food%', 'Fresh%', 'Drinks%', 'Home%', 'Beauty%', 'Health%', 'Baby%',
'Pets%'],
dtype='object')
pd.pivot_table(data=df, index='weekday', values='total_items', aggfunc=[np.mean, np.sum],
margins=True, margins_name='总计商品情况')
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead tr th {
text-align: left;
}
.dataframe thead tr:last-of-type th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr>
<th></th>
<th>mean</th>
<th>sum</th>
</tr>
<tr>
<th></th>
<th>total_items</th>
<th>total_items</th>
</tr>
<tr>
<th>weekday</th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>30.662177</td>
<td>191240</td>
</tr>
<tr>
<th>2</th>
<td>31.868612</td>
<td>158387</td>
</tr>
<tr>
<th>3</th>
<td>31.869796</td>
<td>150043</td>
</tr>
<tr>
<th>4</th>
<td>32.251899</td>
<td>131620</td>
</tr>
<tr>
<th>5</th>
<td>31.406619</td>
<td>79710</td>
</tr>
<tr>
<th>6</th>
<td>32.154814</td>
<td>74149</td>
</tr>
<tr>
<th>7</th>
<td>32.373837</td>
<td>167049</td>
</tr>
<tr>
<th>总计</th>
<td>31.739933</td>
<td>952198</td>
</tr>
</tbody>
</table>
</div>
pd.pivot_table(data=df, index='weekday', columns='customer', values='total_items', aggfunc=[np.mean, np.sum],
margins=True, margins_name='总计商品情况', fill_value=0)
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead tr th {
text-align: left;
}
.dataframe thead tr:last-of-type th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr>
<th></th>
<th colspan="10" halign="left">mean</th>
<th>...</th>
<th colspan="10" halign="left">sum</th>
</tr>
<tr>
<th>customer</th>
<th>0</th>
<th>1</th>
<th>2</th>
<th>3</th>
<th>4</th>
<th>5</th>
<th>6</th>
<th>7</th>
<th>8</th>
<th>9</th>
<th>...</th>
<th>10230</th>
<th>10231</th>
<th>10232</th>
<th>10233</th>
<th>10234</th>
<th>10235</th>
<th>10236</th>
<th>10237</th>
<th>10238</th>
<th>总计商品情况</th>
</tr>
<tr>
<th>weekday</th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>0</td>
<td>32.538462</td>
<td>0</td>
<td>28.571429</td>
<td>15.818182</td>
<td>0</td>
<td>40.000000</td>
<td>40</td>
<td>26.333333</td>
<td>9.000000</td>
<td>...</td>
<td>13</td>
<td>15</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>191240</td>
</tr>
<tr>
<th>2</th>
<td>0</td>
<td>42.333333</td>
<td>0</td>
<td>9.500000</td>
<td>13.400000</td>
<td>21</td>
<td>40.500000</td>
<td>0</td>
<td>54.500000</td>
<td>18.500000</td>
<td>...</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>158387</td>
</tr>
<tr>
<th>3</th>
<td>0</td>
<td>0.000000</td>
<td>0</td>
<td>0.000000</td>
<td>22.333333</td>
<td>0</td>
<td>38.333333</td>
<td>0</td>
<td>0.000000</td>
<td>0.000000</td>
<td>...</td>
<td>39</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>150043</td>
</tr>
<tr>
<th>4</th>
<td>48</td>
<td>12.333333</td>
<td>0</td>
<td>41.500000</td>
<td>12.666667</td>
<td>0</td>
<td>44.000000</td>
<td>0</td>
<td>28.500000</td>
<td>0.000000</td>
<td>...</td>
<td>31</td>
<td>8</td>
<td>2</td>
<td>5</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>54</td>
<td>2</td>
<td>131620</td>
</tr>
<tr>
<th>5</th>
<td>38</td>
<td>36.000000</td>
<td>0</td>
<td>28.000000</td>
<td>3.000000</td>
<td>0</td>
<td>0.000000</td>
<td>0</td>
<td>37.000000</td>
<td>0.000000</td>
<td>...</td>
<td>0</td>
<td>15</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>4</td>
<td>0</td>
<td>10</td>
<td>0</td>
<td>79710</td>
</tr>
<tr>
<th>6</th>
<td>0</td>
<td>0.000000</td>
<td>26</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0</td>
<td>0.000000</td>
<td>0</td>
<td>0.000000</td>
<td>26.333333</td>
<td>...</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>74149</td>
</tr>
<tr>
<th>7</th>
<td>0</td>
<td>0.000000</td>
<td>0</td>
<td>45.000000</td>
<td>24.500000</td>
<td>0</td>
<td>46.000000</td>
<td>0</td>
<td>0.000000</td>
<td>20.333333</td>
<td>...</td>
<td>0</td>
<td>22</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>167049</td>
</tr>
<tr>
<th>总计商品情况</th>
<td>44</td>
<td>31.150000</td>
<td>26</td>
<td>27.782609</td>
<td>17.103448</td>
<td>21</td>
<td>40.769231</td>
<td>40</td>
<td>35.250000</td>
<td>17.750000</td>
<td>...</td>
<td>83</td>
<td>60</td>
<td>2</td>
<td>5</td>
<td>1</td>
<td>4</td>
<td>1</td>
<td>64</td>
<td>2</td>
<td>952198</td>
</tr>
</tbody>
</table>
<p>8 rows × 20480 columns</p>
</div>
# 不同折扣下的样本(订单)数量
pd.crosstab(index=df['weekday'], columns=df['discount%'])
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>discount%</th>
<th>-65.15</th>
<th>-63.64</th>
<th>-47.26</th>
<th>-39.84</th>
<th>-26.15</th>
<th>-23.98</th>
<th>-23.89</th>
<th>-20.59</th>
<th>-19.79</th>
<th>-18.06</th>
<th>...</th>
<th>95.49</th>
<th>95.79</th>
<th>96.65</th>
<th>97.12</th>
<th>98.02</th>
<th>98.36</th>
<th>98.39</th>
<th>98.54</th>
<th>98.6</th>
<th>100.0</th>
</tr>
<tr>
<th>weekday</th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>...</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>71</td>
</tr>
<tr>
<th>2</th>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>2</td>
<td>1</td>
<td>1</td>
<td>...</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>78</td>
</tr>
<tr>
<th>3</th>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>1</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>...</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>59</td>
</tr>
<tr>
<th>4</th>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>...</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>59</td>
</tr>
<tr>
<th>5</th>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>...</td>
<td>1</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>52</td>
</tr>
<tr>
<th>6</th>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>...</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>25</td>
</tr>
<tr>
<th>7</th>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>...</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>0</td>
<td>1</td>
<td>0</td>
<td>0</td>
<td>52</td>
</tr>
</tbody>
</table>
<p>7 rows × 3579 columns</p>
</div>
pd.crosstab(index=df['weekday'], columns=df['discount%'], margins=True, normalize='index')
<div>
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
<table border="1" class="dataframe">
<thead>
<tr style="text-align: right;">
<th>discount%</th>
<th>-65.15</th>
<th>-63.64</th>
<th>-47.26</th>
<th>-39.84</th>
<th>-26.15</th>
<th>-23.98</th>
<th>-23.89</th>
<th>-20.59</th>
<th>-19.79</th>
<th>-18.06</th>
<th>...</th>
<th>95.49</th>
<th>95.79</th>
<th>96.65</th>
<th>97.12</th>
<th>98.02</th>
<th>98.36</th>
<th>98.39</th>
<th>98.54</th>
<th>98.6</th>
<th>100.0</th>
</tr>
<tr>
<th>weekday</th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<th>1</th>
<td>0.000160</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000160</td>
<td>...</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000160</td>
<td>0.000000</td>
<td>0.000160</td>
<td>0.000000</td>
<td>0.011384</td>
</tr>
<tr>
<th>2</th>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000201</td>
<td>0.000402</td>
<td>0.000201</td>
<td>0.000201</td>
<td>...</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000201</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.015694</td>
</tr>
<tr>
<th>3</th>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000212</td>
<td>0.000212</td>
<td>0.000000</td>
<td>0.000212</td>
<td>0.000000</td>
<td>0.000000</td>
<td>...</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.012532</td>
</tr>
<tr>
<th>4</th>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>...</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.014457</td>
</tr>
<tr>
<th>5</th>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000394</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>...</td>
<td>0.000394</td>
<td>0.000394</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000394</td>
<td>0.020489</td>
</tr>
<tr>
<th>6</th>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000434</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>...</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000434</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.010841</td>
</tr>
<tr>
<th>7</th>
<td>0.000000</td>
<td>0.000194</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>...</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000194</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.000194</td>
<td>0.000000</td>
<td>0.000000</td>
<td>0.010078</td>
</tr>
<tr>
<th>All</th>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000100</td>
<td>0.000033</td>
<td>0.000067</td>
<td>...</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.000033</td>
<td>0.013200</td>
</tr>
</tbody>
</table>
<p>8 rows × 3579 columns</p>
</div>
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。