我有一个这样的DataFrame,它是在一些统计模型实验之后出现的。
data = {
"cat1": {
(1, "class1", "metric1"): 0.9520103335380554,
(1, "class1", "metric2"): 0.9596380591392517,
(1, "class2", "metric1"): 0.9013115167617798,
(1, "class2", "metric2"): 0.9917504191398621,
(1, "class3", "metric1"): 0.9027230143547058,
(1, "class3", "metric2"): 0.8536863327026367,
(2, "class1", "metric1"): 0.8746241331100464,
(2, "class1", "metric2"): 0.8844705820083618,
(2, "class2", "metric1"): 0.7890198826789856,
(2, "class2", "metric2"): 0.6964980363845825,
(2, "class3", "metric1"): 0.9410034418106079,
(2, "class3", "metric2"): 0.9601017236709595,
(3, "class1", "metric1"): 0.9640659689903259,
(3, "class1", "metric2"): 0.9766426682472229,
(3, "class2", "metric1"): 0.893884003162384,
(3, "class2", "metric2"): 0.9959416389465332,
(3, "class3", "metric1"): 0.9533607363700867,
(3, "class3", "metric2"): 0.9378591179847717,
},
"cat2": {
(1, "class1", "metric1"): 0.9520103335380554,
(1, "class1", "metric2"): 0.9596380591392517,
(1, "class2", "metric1"): 0.9013115167617798,
(1, "class2", "metric2"): 0.9917504191398621,
(1, "class3", "metric1"): 0.9027230143547058,
(1, "class3", "metric2"): 0.8536863327026367,
(2, "class1", "metric1"): 0.8746241331100464,
(2, "class1", "metric2"): 0.8844705820083618,
(2, "class2", "metric1"): 0.7890198826789856,
(2, "class2", "metric2"): 0.6964980363845825,
(2, "class3", "metric1"): 0.9410034418106079,
(2, "class3", "metric2"): 0.9601017236709595,
(3, "class1", "metric1"): 0.9640659689903259,
(3, "class1", "metric2"): 0.9766426682472229,
(3, "class2", "metric1"): 0.893884003162384,
(3, "class2", "metric2"): 0.9959416389465332,
(3, "class3", "metric1"): 0.9533607363700867,
(3, "class3", "metric2"): 0.9378591179847717,
},
"cat3": {
(1, "class1", "metric1"): 0.8746241331100464,
(1, "class1", "metric2"): 0.8844705820083618,
(1, "class2", "metric1"): 0.7890198826789856,
(1, "class2", "metric2"): 0.6964980363845825,
(1, "class3", "metric1"): 0.9410034418106079,
(1, "class3", "metric2"): 0.9601017236709595,
(2, "class1", "metric1"): 0.9309893846511841,
(2, "class1", "metric2"): 0.884644627571106,
(2, "class2", "metric1"): 0.861851155757904,
(2, "class2", "metric2"): 0.9180170893669128,
(2, "class3", "metric1"): 0.8841384649276733,
(2, "class3", "metric2"): 0.8577012419700623,
(3, "class1", "metric1"): 0.8895564675331116,
(3, "class1", "metric2"): 0.8351058959960938,
(3, "class2", "metric1"): 0.832390308380127,
(3, "class2", "metric2"): 0.8969333171844482,
(3, "class3", "metric1"): 0.7883192300796509,
(3, "class3", "metric2"): 0.8577012419700623,
},
}
df = pd.DataFrame(data)
df = df.rename_axis(("experiment", "class", "metric"))
df.groupby(["class", "metric"]).agg(["mean", "std"])

在对每个实验进行分组和聚合之后,如何合并多索引的第二级列,以便输出字符串连接和舍入,并在其间插入一些符号,如下所示:

发布于 2021-08-31 06:16:10
您可以使用f-string为自定义输出更改函数:
f = lambda x: f'{round(x.mean(), 2)} +/- {round(x.std(), 2)}'
df = df.groupby(["class", "metric"]).agg(f)
print (df)
cat1 cat2 cat3
class metric
class1 metric1 0.93 +/- 0.05 0.93 +/- 0.05 0.9 +/- 0.03
metric2 0.94 +/- 0.05 0.94 +/- 0.05 0.87 +/- 0.03
class2 metric1 0.86 +/- 0.06 0.86 +/- 0.06 0.83 +/- 0.04
metric2 0.89 +/- 0.17 0.89 +/- 0.17 0.84 +/- 0.12
class3 metric1 0.93 +/- 0.03 0.93 +/- 0.03 0.87 +/- 0.08
metric2 0.92 +/- 0.06 0.92 +/- 0.06 0.89 +/- 0.06或者通过DataFrame.xs选择级别,使用convert to string进行四舍五入,通过+/-进行最后连接
df = df.groupby(["class", "metric"]).agg(["mean", "std"])
df = (df.xs('mean', axis=1, level=1).round(2).astype(str) + '+/-' +
df.xs('std', axis=1, level=1).round(2).astype(str))
print (df)
cat1 cat2 cat3
class metric
class1 metric1 0.93+/-0.05 0.93+/-0.05 0.9+/-0.03
metric2 0.94+/-0.05 0.94+/-0.05 0.87+/-0.03
class2 metric1 0.86+/-0.06 0.86+/-0.06 0.83+/-0.04
metric2 0.89+/-0.17 0.89+/-0.17 0.84+/-0.12
class3 metric1 0.93+/-0.03 0.93+/-0.03 0.87+/-0.08
metric2 0.92+/-0.06 0.92+/-0.06 0.89+/-0.06发布于 2021-08-31 06:15:37
你可以使用stack+apply+unstack
(df.groupby(["class", "metric"])
.agg(["mean", "std"])
.stack(level=0)
.apply(lambda r: f'{r["mean"]:.2f}±{r["std"]:.2f}', axis=1)
.unstack(level=-1)
)输出:
cat1 cat2 cat3
class metric
class1 metric1 0.93±0.05 0.93±0.05 0.90±0.03
metric2 0.94±0.05 0.94±0.05 0.87±0.03
class2 metric1 0.86±0.06 0.86±0.06 0.83±0.04
metric2 0.89±0.17 0.89±0.17 0.84±0.12
class3 metric1 0.93±0.03 0.93±0.03 0.87±0.08
metric2 0.92±0.06 0.92±0.06 0.89±0.06https://stackoverflow.com/questions/68993659
复制相似问题