
1、桑基图经常用于能源、金融行业,对材料、成本的流动进行可视化分析。现在很多互联网行业还使用桑基图做用户流动性分析,能很好地观察数据成分的变动大小及变动方向。
基于plotly
import plotly.graph_objects as go
import urllib, json
# 导入数据
url = 'https://raw.githubusercontent.com/plotly/plotly.js/master/test/image/mocks/sankey_energy.json'
response = urllib.request.urlopen(url)
data = json.loads(response.read())
# 将所有的"magenta"颜色更改为rgba(255,0,255, 0.8),并将所有连接颜色更改为其对应的'source'节点颜色且透明度是0.4
opacity = 0.4
data['data'][0]['node']['color'] = ['rgba(255,0,255, 0.8)' if color == "magenta" else color for color in data['data'][0]['node']['color']]
data['data'][0]['link']['color'] = [data['data'][0]['node']['color'][src].replace("0.8", str(opacity))
for src in data['data'][0]['link']['source']]
fig = go.Figure(data=[go.Sankey(
valueformat = ".0f",
valuesuffix = "TWh",
# 定义节点
node = dict(
pad = 15,
thickness = 15,
line = dict(color = "black", width = 0.5),
label = data['data'][0]['node']['label'],
color = data['data'][0]['node']['color']
),
# 添加连接
link = dict(
source = data['data'][0]['link']['source'],
target = data['data'][0]['link']['target'],
value = data['data'][0]['link']['value'],
label = data['data'][0]['link']['label'],
color = data['data'][0]['link']['color']
))])
fig.update_layout(title_text="Energy forecast for 2050<br>Source: Department of Energy & Climate Change, Tom Counsell via <a href='https://bost.ocks.org/mike/sankey/'>Mike Bostock</a>",
font_size=10)

2、基于pyecharts
import pyecharts.options as opts
from pyecharts.charts import Sankey
import urllib, json
# 导入数据
url = 'https://echarts.apache.org/examples/data/asset/data/energy.json'
response = urllib.request.urlopen(url)
data = json.loads(response.read())
c = (
Sankey()
.add(
series_name="",
nodes=data["nodes"],
links=data["links"],
itemstyle_opts=opts.ItemStyleOpts(border_width=1, border_color="#aaa"),
linestyle_opt=opts.LineStyleOpts(color="source", curve=0.5, opacity=0.5),
tooltip_opts=opts.TooltipOpts(trigger_on="mousemove"),
)
.set_global_opts(title_opts=opts.TitleOpts(title=""))
)
c.render_notebook()

3、基于pysankey
import pandas as pd
from pySankey.sankey import sankey
# 基于source和target,数据可重复出现,出现次数越多,权重越大(即线越粗)
url = "https://raw.githubusercontent.com/anazalea/pySankey/master/pysankey/fruits.txt"
df = pd.read_csv(url, sep=" ", names=["true", "predicted"])
colors = {
"apple": "#f71b1b",
"blueberry": "#1b7ef7",
"banana": "#f3f71b",
"lime": "#12e23f",
"orange": "#f78c1b"
}
sankey(df["true"], df["predicted"], aspect=20, colorDict=colors, fontsize=12)

4
import pandas as pd
from pySankey.sankey import sankey
# 基于source和、target和value,数据可仅出现一次,value即权重
url = "https://raw.githubusercontent.com/anazalea/pySankey/master/pysankey/customers-goods.csv"
df = pd.read_csv(url, sep=",")
sankey(
left=df["customer"], right=df["good"],
leftWeight= df["revenue"], rightWeight=df["revenue"],
aspect=20, fontsize=20
)

5、总结
以上通过plotly、pyecharts和pysankey快速绘桑基图。
共勉~