# Python绘制histogram直方图

01 纯Python实现histogram

>>> a = (,1,1,1,2,3,7,7,23)

>>>defcount_elements(seq)->dict:

..."""Tally elements from seq."""

... hist = {}

...foriinseq:

... hist[i] = hist.get(i,) +1

...returnhist

>>> counted = count_elements(a)

>>> counted

{:1,1:3,2:1,3:1,7:2,23:1}

>>> from collections import Counter

>>> recounted = Counter(a)

>>> recounted

Counter({:1,1:3,3:1,2:1,7:2,23:1})

>>> recounted.items() == counted.items()

True

defascii_histogram(seq)->None:

"""A horizontal frequency-table/histogram plot."""

counted = count_elements(seq)

forkinsorted(counted):

print(' '.format(k,'+'* counted[k]))

>>> import random

>>> random.seed(1)

>>> vals = [1,3,4,6,8,9,10]

>>># vals 里面的数字将会出现5到15次

>>> freq = (random.randint(5,15) for_in vals)

>>> data = []

>>> for f, v in zip(freq, vals):

... data.extend([v] * f)

>>> ascii_histogram(data)

1+++++++

3++++++++++++++

4++++++

6+++++++++

8++++++

9++++++++++++

10++++++++++++

02 使用Numpy实现histogram

>>> hist, bin_edges = np.histogram(d)

>>> hist

array([1,,3,4,4,10,13,9,2,4])

>>> bin_edges

array([3.217,5.199,7.181,9.163,11.145,13.127,15.109,17.091,

19.073,21.055,23.037])

>>>hist.size,bin_edges.size

(10, 11)

>>># 取a的最小值和最大值

>>> first_edge, last_edge = a.min(), a.max()

>>> n_equal_bins =10# NumPy得默认设置，10个分箱

>>> bin_edges = np.linspace(start=first_edge, stop=last_edge,

... num=n_equal_bins +1, endpoint=True)

...

>>> bin_edges

array([. ,2.3,4.6,6.9,9.2,11.5,13.8,16.1,18.4,20.7,23. ])

>>> bcounts = np.bincount(a)

>>> hist,_= np.histogram(a, range=(, a.max()), bins=a.max() +1)

>>> np.array_equal(hist, bcounts)

True

>>># Reproducing collections.Counter

>>> dict(zip(np.unique(a), bcounts[bcounts.nonzero()]))

{:1,1:3,2:1,3:1,7:2,23:1}

02 使用Matplotlib和Pandas可视化Histogram

importmatplotlib.pyplotasplt

# matplotlib.axes.Axes.hist() 方法的接口

n, bins, patches = plt.hist(x=d, bins='auto', color='#0504aa',

alpha=0.7, rwidth=0.85)

plt.grid(axis='y', alpha=0.75)

plt.xlabel('Value')

plt.ylabel('Frequency')

plt.title('My Very Own Histogram')

plt.text(23,45,r'$\mu=15, b=3$')

maxfreq = n.max()

# 设置y轴的上限

plt.ylim(ymax=np.ceil(maxfreq /10) *10ifmaxfreq %10elsemaxfreq +10)

pandas.DataFrame.histogram()的用法与Series是一样的，但生成的是对DataFrame数据中的每一列的直方图。

03 在Pandas中的其它工具

>>> ages = pd.Series(

... [1,1,3,5,8,10,12,15,18,18,19,20,25,30,40,51,52])

>>> bins = (,10,13,18,21, np.inf)# 边界

>>> groups = pd.cut(ages, bins=bins, labels=labels)

>>> groups.value_counts()

child6

teen3

military_age2

preteen1

dtype:int64

>>> pd.concat((ages, groups), axis=1).rename(columns={:'age',1:'group'})

age group

1child

11child

23child

35child

48child

510child

612preteen

715teen

818teen

918teen

1019military_age

1120military_age

∞∞∞∞∞

IT派 -

• 发表于:
• 原文链接https://kuaibao.qq.com/s/20180809B07UQ000?refer=cp_1026
• 腾讯「云+社区」是腾讯内容开放平台帐号（企鹅号）传播渠道之一，根据《腾讯内容开放平台服务协议》转载发布内容。
• 如有侵权，请联系 yunjia_community@tencent.com 删除。

2020-11-30

2020-11-30

2020-11-30

2020-11-30

2020-11-30

2020-11-30

2018-06-27

2018-06-25

2018-06-15

2018-06-11

2020-11-30

2020-11-30