import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
以下数据如果有需要的同学可关注公众号HsuHeinrich,回复【数据可视化】自动获取~
# 导入数据
df_mac_raw = pd.read_csv(
"https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-12-22/big-mac.csv"
)
df_mac_raw.head()

image-20240129181456701
date:日期 iso_a3:国家的三个字母的ISO代码 currency_code:国际货币代码 name:名称 dollar_price:美元价格
# 重点突出的国家地区
HIGHLIGHTS = ["EUZ", "CHE", "DNK", "SWE", "BRA", "ARG", "GBR", "USA"]
# 年份
df_mac_raw["year"] = pd.DatetimeIndex(df_mac_raw["date"]).year
# 主要变量
df_mac_raw = df_mac_raw[["date", "year", "iso_a3", "currency_code", "name", "dollar_price"]]
# 以2008年为基准年份获取相对价格
# 分组计算dollar_price的均值
df_mac = df_mac_raw.groupby(["iso_a3", "name", "year"]).agg(
price = ("dollar_price", "mean")
).reset_index()
# 保留过去21年有记录的国家
group_sizes = df_mac.groupby("iso_a3").size()
keep = (group_sizes[group_sizes == 21]).index.tolist()
df_mac = df_mac[df_mac["iso_a3"].isin(keep)]
# 保留2008年有记录的国家
countries = df_mac[df_mac["year"] == 2008]["iso_a3"].tolist()
df_mac_indexed_2008 = df_mac[df_mac["iso_a3"].isin(countries)]
df_mac_indexed_2008["ref_year"] = 2008
# 获取每个国家2008年的价格,并重命名为price_index。此辅助df仅保留iso_a3、price_index
df_price_index = df_mac_indexed_2008.groupby("iso_a3").apply(
lambda x: x.iloc[np.where(x["year"] == 2008)]
).reset_index(drop=True)
df_price_index.rename(columns={"price": "price_index"}, inplace=True)
df_price_index = df_price_index[["iso_a3", "price_index"]]
# 利用df_price_index计算相对价格
df_mac_indexed_2008 = pd.merge(df_mac_indexed_2008, df_price_index, on = "iso_a3")
df_mac_indexed_2008["price_rel"] = df_mac_indexed_2008["price"] - df_mac_indexed_2008["price_index"]
# 创建group列,用以区分高亮显示的国家地区
df_mac_indexed_2008["group"] = np.where(
df_mac_indexed_2008["iso_a3"].isin(HIGHLIGHTS),
df_mac_indexed_2008["iso_a3"],
"other"
)
# 将group转为Categorical类型。
df_mac_indexed_2008["group"] = pd.Categorical(
df_mac_indexed_2008["group"],
ordered=True,
categories=sorted(HIGHLIGHTS) + ["other"]
)
# 基础变量
# 灰色阴影
GREY10 = "#1a1a1a"
GREY30 = "#4d4d4d"
GREY40 = "#666666"
GREY50 = "#7f7f7f"
GREY60 = "#999999"
GREY75 = "#bfbfbf"
GREY91 = "#e8e8e8"
GREY98 = "#fafafa"
# 各国家的颜色
COLOR_SCALE = [
"#7F3C8D", # ARG
"#11A579", # BRA
"#3969AC", # CHE
"#F2B701", # DNK
"#E73F74", # EUZ
"#80BA5A", # GBR
"#E68310", # SWE
GREY50 # USA
]
# 垂直参考线
VLINES = np.arange(2000, 2025, 5)
# 初始化布局
fig, ax = plt.subplots(figsize = (14, 8.5))
# 背景色
fig.patch.set_facecolor(GREY98)
ax.set_facecolor(GREY98)
# 绘制垂直参考线
for h in VLINES:
ax.axvline(h, color=GREY91, lw=0.6, zorder=0)
# 水平线
ax.hlines(y=np.arange(-4, 4), xmin=2000, xmax=2020, color=GREY91, lw=0.6)
# y=0处的水平线
ax.hlines(y=0, xmin=2000, xmax=2020, color=GREY60, lw=0.8)
# x=2008处的垂直线
ax.axvline(2008, color=GREY40, ls="dotted")
# 垂直线的文本注释
ax.text(2008.15, -3.35, "2008", fontname="Montserrat",
fontsize=14, fontweight=500, color=GREY40, ha="left")
# 区分高亮的df和其他的df
df_highlight = df_mac_indexed_2008[df_mac_indexed_2008["group"] != "other"]
df_others = df_mac_indexed_2008[df_mac_indexed_2008["group"] == "other"]
# 绘制其他df的折线
for group in df_others["iso_a3"].unique():
data = df_others[df_others["iso_a3"] == group]
ax.plot("year", "price_rel", c=GREY75, lw=1.2, alpha=0.5, data=data)
# 绘制高亮df的折线
for idx, group in enumerate(df_highlight["iso_a3"].unique()):
data = df_highlight[df_highlight["iso_a3"] == group]
color = COLOR_SCALE[idx]
ax.plot("year", "price_rel", color=color, lw=1.8, data=data)

output_10_0
# 调整轴范围
ax.set_xlim(2000, 2024.5)
ax.set_ylim(-4.1, 3)
# 标签的位置
LABEL_Y = [
-0.45, # ARG
-0.15, # BRA
0.5, # CHE
-1.7, # DNK
-0.75, # EUZ
0.15, # GBR
-1.05, # SWE
2.1 # USA
]
x_start = 2020
x_end = 2021
PAD = 0.1
# 为高亮显示的国家添加标签
for idx, group in enumerate(df_highlight["iso_a3"].unique()):
data = df_highlight[(df_highlight["iso_a3"] == group) & (df_highlight["year"] == 2020)]
color = COLOR_SCALE[idx]
# 国家名称
text = data["name"].values[0]
# 虚线起始位置
y_start = data["price_rel"].values[0]
# 虚线终止未知
y_end = LABEL_Y[idx]
# 添加虚折线(利用三个点的坐标)
ax.plot(
[x_start, (x_start + x_end - PAD) / 2 , x_end - PAD],
[y_start, y_end, y_end],
color=color,
alpha=0.5,
ls="dashed"
)
# 文本注视
ax.text(
x_end,
y_end,
text,
color=color,
fontsize=14,
weight="bold",
fontfamily="Montserrat",
va="center"
)
fig

output_12_0
# 自定义轴标签和刻度
ax.set_yticks([y for y in np.arange(-4, 4)])
ax.set_yticklabels(
[f"{y}.00$" for y in np.arange(-4, 4)],
fontname="Montserrat",
fontsize=11,
weight=500,
color=GREY40
)
ax.set_xticks([x for x in np.arange(2000, 2025, 5)])
ax.set_xticklabels(
[x for x in np.arange(2000, 2025, 5)],
fontname= "Montserrat",
fontsize=13,
weight=500,
color=GREY40
)
# 修改刻度大小和颜色
ax.tick_params(axis="x", length=12, color=GREY91)
ax.tick_params(axis="y", length=8, color=GREY91)
# 自定义边框
ax.spines["left"].set_color(GREY91)
ax.spines["bottom"].set_color(GREY91)
ax.spines["right"].set_color("none")
ax.spines["top"].set_color("none")
# 添加标题及文本说明
subtitle = [
"The index chart visualizes the price changes (in USD) of a Big Mac based on a 2008 as index year. The Big Mac Index is published by The Economist as an informal way to provide",
"a test of the extent to which market exchange rates result in goods costing the same in different countries. It seeks to make exchange-rate theory a bit more digestible and takes,",
"its name from the Big Mac a hamburger sold at McDonald's restaurants",
]
fig.text(
0.08,
0.97,
"Compared to the financial crisis in 2008, how much more or less do you have to pay for a Big Mac today?",
color=GREY10,
fontsize=15,
fontname="Montserrat",
weight="bold"
)
fig.text(
0.08,
0.91,
"\n".join(subtitle),
ha="left",
color=GREY30,
fontname="Montserrat",
fontsize=9,
)
fig.text(
0.08,
0.05,
"Visualization by Cédric Scherer • Data by The Economist • The index chart shows the 27 countries that provide Big mac prices for all years from 2000 to 2020. In case a country was reported twice per year, the mean value was visualized.",
fontname="Montserrat",
fontsize=6.5,
color=GREY30,
ha="left"
)
fig

output_14_0
参考:Line chart with labels at end of each line[1]
共勉~
[1]
Line chart with labels at end of each line: https://python-graph-gallery.com/web-line-chart-with-labels-at-line-end/