Python绘图系列之同时绘制并列和堆叠柱状图

这篇文章学习了如果同时绘制并列和堆叠柱状图的方法,给出了三种实现方式。

背景

之前绘制柱状图要么是单独的堆叠柱状图,要么是单独的并列的柱状图,但是现在遇到的一种情况就是在绘制并列柱状图的时候希望突出显示某种分类,这个时候就需要在并列柱状图中添加堆叠柱状图。

实现方法一:Seaborn

构造数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
df1 = pd.DataFrame(np.random.rand(4, 5),
index=["A", "B", "C", "D"],
columns=["I", "J", "K", "L", "M"])
df2 = pd.DataFrame(np.random.rand(4, 5),
index=["A", "B", "C", "D"],
columns=["I", "J", "K", "L", "M"])
df3 = pd.DataFrame(np.random.rand(4, 5),
index=["A", "B", "C", "D"],
columns=["I", "J", "K", "L", "M"])

df1
I J K L M
A 0.584456 0.193642 0.855521 0.849345 0.142361
B 0.231361 0.172228 0.870167 0.922349 0.903166
C 0.959428 0.871147 0.700459 0.489180 0.232174
D 0.750299 0.686798 0.913503 0.410401 0.800438

df2
I J K L M
A 0.843176 0.619224 0.472286 0.438836 0.470271
B 0.476635 0.853611 0.355111 0.381609 0.549682
C 0.417060 0.856389 0.377922 0.717815 0.736557
D 0.607617 0.032434 0.541995 0.588105 0.710369

df3
I J K L M
A 0.603417 0.406737 0.764824 0.479933 0.234480
B 0.671419 0.770811 0.574703 0.166566 0.290416
C 0.782604 0.274776 0.715263 0.006884 0.908278
D 0.187266 0.601971 0.880003 0.233897 0.117642

数据转换

将上述的宽数据转换为长数据:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# 加一列标识
df1["Name"] = "df1"
df2["Name"] = "df2"
df3["Name"] = "df3"
# 转换为长数据
dfall = pd.concat([pd.melt(i.reset_index(),
id_vars=["Name", "index"])
for i in [df1, df2, df3]],
ignore_index=True)
dfall
Name index variable value
0 df1 A I 0.565242
1 df1 B I 0.990447
2 df1 C I 0.511492
3 df1 D I 0.242134
4 df1 A J 0.462155
...

# 添加一列累计值,用于绘制堆叠的效果
dfall["vcs"] = dfall.groupby(["Name", "index"]).cumsum()
dfall
Name index variable value vcs
0 df1 A I 0.565242 0.565242
1 df1 B I 0.990447 0.990447
2 df1 C I 0.511492 0.511492
3 df1 D I 0.242134 0.242134
4 df1 A J 0.462155 1.027398 # 这里的就是1.02就是df1 A I的值加上df1 A J
...

画图

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import seaborn as sns
# 设置颜色的字典
c = {"I":"blue", "J":"purple", "K":"red", "L":"green", "M":"pink"}
for i, g in enumerate(dfall.groupby("variable")):
ax = sns.barplot(data=g[1],
x="index",
y="vcs",
# hue参数得到并列柱状图
hue="Name",
# 后续的color和zorder得到堆叠柱状图的效果
# 这两个参数是关键
color=c[g[0]],
zorder=-i,
edgecolor="k")
# 去除自动生成的legend,后续自己手动添加legend
ax.legend_.remove()
# 根据颜色字典生成legend
manul_legend_list=[mpatches.Patch(color=v, label=k) for k,v in c.items()]
# 设置legend的字体大小和位置等信息
plt.legend(handles=manul_legend_list,fontsize=10,bbox_to_anchor=(1,1))
# 设置标题
plt.title('Stacked and Dodge bar plot')
# 去除grid
plt.grid(False)

python_stacked_dodge_barplot


缺点

上述使用Seaborn实现的缺点是:hue的颜色会逐渐变淡,如果并列的柱子过多,最后颜色太淡可能不能很好地区分

实现方法二:Matplotlib

搬运的代码

这里直接搬运了参考链接中的代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import pandas as pd
import matplotlib.cm as cm
import numpy as np
import matplotlib.pyplot as plt

# H是并列的柱子的区分形式
# labels是区分开的并列的柱子的legend名称
# dfall是宽数据构成的列表
def plot_clustered_stacked(dfall, labels=None, title="multiple stacked bar plot", H="/", **kwargs):
"""Given a list of dataframes, with identical columns and index, create a clustered stacked bar plot.
labels is a list of the names of the dataframe, used for the legend
title is a string for the title of the plot
H is the hatch used for identification of the different dataframe"""

n_df = len(dfall)
n_col = len(dfall[0].columns)
n_ind = len(dfall[0].index)
axe = plt.subplot(111)

# 这个会生成一个柱状图,互相遮挡
for df in dfall : # for each data frame
axe = df.plot(kind="bar",
linewidth=0,
stacked=True,
ax=axe,
legend=False,
grid=False,
**kwargs) # make bar plots

h,l = axe.get_legend_handles_labels() # get the handles we want to modify
for i in range(0, n_df * n_col, n_col): # len(h) = n_col * n_df
for j, pa in enumerate(h[i:i+n_col]):
for rect in pa.patches: # for each index
# 这一句是将之前互相遮挡的柱子移开
rect.set_x(rect.get_x() + 1 / float(n_df + 1) * i / float(n_col))
# 这一句是用于区分各个并列的柱子
rect.set_hatch(H * int(i / n_col)) #edited part
# 这一句是设置柱子的宽度
rect.set_width(1 / float(n_df + 1))

axe.set_xticks((np.arange(0, 2 * n_ind, 2) + 1 / float(n_df + 1)) / 2.)
axe.set_xticklabels(df.index, rotation = 0)
axe.set_title(title)

# Add invisible data to add another legend
n=[]
for i in range(n_df):
n.append(axe.bar(0, 0, color="gray", hatch=H * i))

l1 = axe.legend(h[:n_col], l[:n_col], loc=[1.01, 0.5])
if labels is not None:
l2 = plt.legend(n, labels, loc=[1.01, 0.1])
axe.add_artist(l1)
return axe

# create fake dataframes
df1 = pd.DataFrame(np.random.rand(4, 5),
index=["A", "B", "C", "D"],
columns=["I", "J", "K", "L", "M"])
df2 = pd.DataFrame(np.random.rand(4, 5),
index=["A", "B", "C", "D"],
columns=["I", "J", "K", "L", "M"])
df3 = pd.DataFrame(np.random.rand(4, 5),
index=["A", "B", "C", "D"],
columns=["I", "J", "K", "L", "M"])

# Then, just call :
plot_clustered_stacked([df1, df2, df3],["df1", "df2", "df3"])

python_stacked_dodge_barplot_mat

自己修改

上面搬运的代码是通过在柱体上添加标记来区分不同的并列分类条件,但是我觉得这种方式不是很好,想用边框的颜色来加以区分,这里修改了一下上述代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pandas as pd
import matplotlib.cm as cm
import numpy as np
import matplotlib.pyplot as plt

def plot_clustered_stacked(dfall, ecs,labels=None, title="multiple stacked bar plot", **kwargs):
"""Given a list of dataframes, with identical columns and index, create a clustered stacked bar plot.
labels is a list of the names of the dataframe, used for the legend
title is a string for the title of the plot
ecs is the color list of different dodge bar edgecolor"""

n_df = len(dfall)
n_col = len(dfall[0].columns)
n_ind = len(dfall[0].index)
line= [0] + [1]*(len(dfall)-1)
axe = plt.subplot(111)

for df_i in range(len(dfall)) : # for each data frame
axe = dfall[df_i].plot(kind="bar",
linewidth=line[df_i],
stacked=True,
ax=axe,
legend=False,
grid=False,
edgecolor=ecs[df_i],
**kwargs) # make bar plots

h,l = axe.get_legend_handles_labels() # get the handles we want to modify
# i为并列的第几个柱子
for i in range(0, n_df * n_col, n_col): # len(h) = n_col * n_df
# pa为柱子中堆叠的小柱子
for j, pa in enumerate(h[i:i+n_col]):
# rect在X轴大分类中的第几个
for rect in pa.patches: # for each index
rect.set_x(rect.get_x() + 1 / float(n_df + 1) * i / float(n_col))
# rect.set_hatch(H * int(i / n_col)) #edited part
rect.set_width(1 / float(n_df + 1))

bar_width=h[0].patches[0].get_width()
xtick_li=[h[0].patches[i].get_x() + bar_width*n_df/2 for i in range(n_ind)]
axe.set_xticks(xtick_li)
axe.set_xticklabels(dfall[0].index, rotation = 0)
axe.set_title(title)

# Add invisible data to add another legend
# 这里的添加最后的legend的部分
n=[]
for i in range(n_df):
n.append(axe.bar(0, 0, color="gray", linewidth=line[i],edgecolor=ecs[i]))

l1 = axe.legend(h[:n_col], l[:n_col], loc=[1.01, 0.5])
if labels is not None:
l2 = plt.legend(n, labels, loc=[1.01, 0.1])
axe.add_artist(l1)
return axe

# 使用
plot_clustered_stacked([df1, df2, df3],ecs=['white','black','red'],labels=["df1", "df2", "df3"])

python_stacked_dodge_barplot_mat_own


增加子图兼容性

上述自己修改的代码还存在的问题是不能嵌合到子图中,这里进行修改:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import pandas as pd
import matplotlib.cm as cm
import numpy as np
import matplotlib.pyplot as plt

# 增加了legend参数,帮助控制legend的显示
def plot_clustered_stacked(dfall, ecs,labels=None, legend=True,title="multiple stacked bar plot", **kwargs):
"""Given a list of dataframes, with identical columns and index, create a clustered stacked bar plot.
labels is a list of the names of the dataframe, used for the legend
title is a string for the title of the plot
ecs is the color list of different dodge bar edgecolor"""

n_df = len(dfall)
n_col = len(dfall[0].columns)
n_ind = len(dfall[0].index)
line= [0] + [1]*(len(dfall)-1)
# axe = plt.subplot(111)

for df_i in range(len(dfall)) : # for each data frame
axe = dfall[df_i].plot(kind="bar",
linewidth=line[df_i],
stacked=True,
# ax=axe,
legend=False,
grid=False,
edgecolor=ecs[df_i],
**kwargs) # make bar plots

h,l = axe.get_legend_handles_labels() # get the handles we want to modify
# i为并列的第几个柱子
for i in range(0, n_df * n_col, n_col): # len(h) = n_col * n_df
# pa为柱子中堆叠的小柱子
for j, pa in enumerate(h[i:i+n_col]):
# rect在X轴大分类中的第几个
for rect in pa.patches: # for each index
rect.set_x(rect.get_x() + 1 / float(n_df + 1) * i / float(n_col))
# rect.set_hatch(H * int(i / n_col)) #edited part
rect.set_width(1 / float(n_df + 1))

bar_width=h[0].patches[0].get_width()
xtick_li=[h[0].patches[i].get_x() + bar_width*n_df/2 for i in range(n_ind)]
axe.set_xticks(xtick_li)
axe.set_xticklabels(dfall[0].index, rotation = 0)
axe.set_title(title)

# Add invisible data to add another legend
# 这里的添加最后的legend的部分
n=[]
for i in range(n_df):
n.append(axe.bar(0, 0, color="gray", linewidth=line[i],edgecolor=ecs[i]))
if legend:
l1 = axe.legend(h[:n_col], l[:n_col], loc=[1.01, 0.5])
if labels is not None:
l2 = plt.legend(n, labels, loc=[1.01, 0.1])
axe.add_artist(l1)
return axe

# 使用
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
ax=plot_clustered_stacked([df1, df2, df3],ecs=['white','black','red'],labels=["df1", "df2", "df3"],ax=axes[0],legend=False)
ax.set_xlabel('')
ax=plot_clustered_stacked([df1, df2, df3],ecs=['white','black','red'],labels=["df1", "df2", "df3"],ax=axes[1])
ax.set_xlabel('')
# 刻度值旋转
# ax.set_xticklabels(ax.get_xticklabels(),rotation=15)
plt.tight_layout()

python_stacked_dodge_barplot_mat_own_sub


设置堆叠部分的颜色

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas as pd
import matplotlib.cm as cm
import numpy as np
import matplotlib.pyplot as plt

# 增加了legend参数,帮助控制legend的显示
def plot_clustered_stacked(dfall, ecs,color_dict,labels=None, legend=True,title="multiple stacked bar plot", **kwargs):
"""Given a list of dataframes, with identical columns and index, create a clustered stacked bar plot.
labels is a list of the names of the dataframe, used for the legend
title is a string for the title of the plot
ecs is the color list of different dodge bar edgecolor"""

n_df = len(dfall)
n_col = len(dfall[0].columns)
n_ind = len(dfall[0].index)
line= [0] + [1]*(len(dfall)-1)
# axe = plt.subplot(111)

for df_i in range(len(dfall)) : # for each data frame
axe = dfall[df_i].plot(kind="bar",
linewidth=line[df_i],
stacked=True,
# ax=axe,
legend=False,
grid=False,
color=[color_dict.get(x) for x in dfall[df_i].columns],
edgecolor=ecs[df_i],
**kwargs) # make bar plots

h,l = axe.get_legend_handles_labels() # get the handles we want to modify
# i为并列的第几个柱子
for i in range(0, n_df * n_col, n_col): # len(h) = n_col * n_df
# pa为柱子中堆叠的小柱子
for j, pa in enumerate(h[i:i+n_col]):
# rect在X轴大分类中的第几个
for rect in pa.patches: # for each index
rect.set_x(rect.get_x() + 1 / float(n_df + 1) * i / float(n_col))
# rect.set_hatch(H * int(i / n_col)) #edited part
rect.set_width(1 / float(n_df + 1))

bar_width=h[0].patches[0].get_width()
xtick_li=[h[0].patches[i].get_x() + bar_width*n_df/2 for i in range(n_ind)]
axe.set_xticks(xtick_li)
axe.set_xticklabels(dfall[0].index, rotation = 0)
axe.set_title(title)

# Add invisible data to add another legend
# 这里的添加最后的legend的部分
n=[]
for i in range(n_df):
n.append(axe.bar(0, 0, color="gray", linewidth=line[i],edgecolor=ecs[i]))
if legend:
l1 = axe.legend(h[:n_col], l[:n_col], loc=[1.01, 0.5])
if labels is not None:
l2 = plt.legend(n, labels, loc=[1.01, 0.1])
axe.add_artist(l1)
return axe

Seaborn的color palettes


实现方式三:封装的函数

已经有人针对这问题封装了一个很完善的函数,不过其也是基于Seaborn的,所以Seaborn的缺陷该函数也有,不过程度已经减低很多。

封装的函数下载地址,使用帮助文档.

1
2
3
4
5
6
7
8
9
10
11
12
# 下载程序
# https://gitlab.com/ciraig/lca_standard_graphs/-/blob/master/lca_standard_graphs.py
import sys
sys.path.append('/home/user/scripts/common_use/')
import lca_standard_graphs as lsg

# 使用代码构建画图的dataframe
comp = lsg.build_comparison_table([df1, df2,df3], ["df1", "df2", "df3"], fillna=0.0)
comp.index.names=['Cat','Scenarios']

# 画图
ax, fig = lsg.plot_grouped_stackedbars(comp, ix_categories='Cat', ix_entities_compared='Scenarios', norm=None )

python_stacked_dodge_barplot_sea_fun


参考链接



-----本文结束感谢您的阅读-----

本文标题:Python绘图系列之同时绘制并列和堆叠柱状图

文章作者:showteeth

发布时间:2020年08月06日 - 19:16

最后更新:2020年08月07日 - 13:47

原始链接:http://showteeth.tech/posts/290.html

许可协议: 署名-非商业性使用-禁止演绎 4.0 国际 转载请保留原文链接及作者。

0%