Module omniplot.proportion
Expand source code
from typing import Union, Optional, Dict, List
import matplotlib.collections as mc
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from matplotlib import cm
from matplotlib.lines import Line2D
from scipy.cluster.hierarchy import leaves_list
from scipy.cluster import hierarchy
from collections import defaultdict
import matplotlib.colors
from natsort import natsort_keygen
from matplotlib.patches import Rectangle
import scipy.cluster.hierarchy as sch
import fastcluster as fcl
from matplotlib.patches import Patch
import sys
import matplotlib as mpl
from sklearn.cluster import KMeans, DBSCAN
from sklearn.metrics import silhouette_score
from scipy.spatial.distance import pdist, squareform
from sklearn.decomposition import PCA, NMF, LatentDirichletAllocation
from scipy.stats import fisher_exact
from scipy.stats import zscore
from itertools import combinations
import os
#script_dir = os.path.dirname( __file__ )
#sys.path.append( script_dir )
from omniplot.utils import * #
import scipy.stats as stats
from joblib import Parallel, delayed
from omniplot.chipseq_utils import _calc_pearson
import itertools as it
from matplotlib.ticker import StrMethodFormatter
from omniplot.scatter import *
colormap_list: list=["nipy_spectral", "terrain","tab20b","tab20c","gist_rainbow","hsv","CMRmap","coolwarm","gnuplot","gist_stern","brg","rainbow","jet"]
hatch_list: list = ['//', '\\\\', '||', '--', '++', 'xx', 'oo', 'OO', '..', '**','/o', '\\|', '|*', '-\\', '+o', 'x*', 'o-', 'O|', 'O.', '*-']
maker_list: list=['.', '_' , '+','|', 'x', 'v', '^', '<', '>', 's', 'p', '*', 'h', 'D', 'd', 'P', 'X','o', '1', '2', '3', '4','|', '_']
plt.rcParams['font.family']= 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']
plt.rcParams['svg.fonttype'] = 'none'
sns.set_theme(font="Arial")
__all__=["stacked_barplot","nice_piechart","nice_piechart_num","stackedlines","nested_piechart","_nested_piechart","stacked_barplot_num"]
def _stacked_barplot(df: pd.DataFrame,
x: Union[str, list],
hue: Union[str, list],
scale: str="fraction",
order: list=[],
hue_order: list=[],
test_pairs: List[List[str]]=[],
show_values: bool=True,
show: bool=False,
figsize: List[int]=[4,6],
xunit: str="",
yunit: str="",
title: str="",
hatch: bool=False)-> Dict:
"""
Drawing a stacked barplot with or without the fisher's exact test
Parameters
----------
df : pandas DataFrame
x: str or list
The category to place in x axis. Only str values are accepted.
hue: str or list
Counting samples by the hue category. Only str values are accepted.
order: list, optional
The order of x axis labels
hue_order: list, optional
The order of hue labels
scale: str, optional
Scaling method. Available options are: fraction, percentage, absolute
test_pairs : pairs of categorical values related to x. It will calculate -log10 (p value) (mlp) of the fisher exact test.
Examples: [["Adelie","Chinstrap" ],
["Gentoo","Chinstrap" ],
["Adelie","Gentoo" ]]
show_values: bool, optional
Wheter to exhibit the values of fractions/counts/percentages.
show : bool, optional
Whether or not to show the figure.
figsize : List[int], optional
The figure size, e.g., [4, 6].
Returns
-------
dict("p values":pvalues,"axes":ax)
Raises
------
Notes
-----
References
----------
See Also
--------
Examples
--------
"""
data: Dict={}
if df[x].isnull().values.any():
df[x]=df[x].replace(np.nan, "NA")
if df[hue].isnull().values.any():
df[hue]=df[hue].replace(np.nan, "NA")
if len(order)==0:
u=np.unique(df[x])
keys=sorted(list(u))
else:
keys=order
if len(hue_order)==0:
u=np.unique(df[hue])
hues=sorted(list(u))
else:
hues=hue_order
for key in keys:
data[key]=[]
for h in hues:
data[key].append(np.sum((df[x]==key) & (df[hue]==h)))
pvals={}
if len(test_pairs) >0:
for i, h in enumerate(hues):
pvals[h]=[]
for p1,p2 in test_pairs:
idx1=keys.index(p1)
idx2=keys.index(p2)
yes_total=np.sum(data[keys[idx1]])
no_total=np.sum(data[keys[idx2]])
yes_and_hue=data[keys[idx1]][i]
no_and_hue=data[keys[idx2]][i]
table=[[yes_and_hue, no_and_hue],
[yes_total-yes_and_hue, no_total-no_and_hue]]
odd, pval=fisher_exact(table)
pvals[h].append([idx1, idx2, pval])
if scale=="fraction":
for key in keys:
data[key]=np.array(data[key])/np.sum(data[key])
elif scale=="percentage":
for key in keys:
data[key]=np.array(data[key])/np.sum(data[key])*100
bottom=np.zeros([len(keys)])
cmap=plt.get_cmap("tab20b")
fig, ax=plt.subplots(figsize=figsize)
plt.subplots_adjust(left=0.2,right=0.6, bottom=0.17)
if scale=="absolute":
unit=""
elif scale=="fraction":
unit=""
elif scale=="percentage":
unit="%"
pos={}
#_hatch_list=[hatch_list[i] for i in range(len(keys))]
for i, h in enumerate(hues):
heights=np.array([data[key][i] for key in keys])
if hatch==True:
plt.bar(keys, heights,width=0.5, bottom=bottom, color=cmap(i/len(hues)), label=h, hatch=hatch_list[i])
else:
plt.bar(keys, heights,width=0.5, bottom=bottom, color=cmap(i/len(hues)), label=h)
if show_values==True:
for j in range(len(keys)):
if scale=="absolute":
plt.text(j,bottom[j]+heights[j]/2,"{}{}".format(heights[j],unit),
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
else:
plt.text(j,bottom[j]+heights[j]/2,"{:.2f}{}".format(heights[j],unit),
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
plt.xticks(rotation=90)
pos[h]={key: [he, bo] for key, he, bo in zip(keys, heights, bottom)}
bottom+=heights
ax.legend(loc=[1.01,0])
ax.set_xlabel(x)
if scale=="absolute":
ylabel="Counts"
elif scale=="fraction":
ylabel="Fraction"
elif scale=="percentage":
ylabel="Percentage"
ax.set_ylabel(ylabel)
if len(pvals)>0:
print("mlp stands for -log10(p value)")
for i, h in enumerate(hues):
_pos=pos[h]
for idx1, idx2, pval in pvals[h]:
if pval < 0.05:
he1, bot1=_pos[keys[idx1]]
he2, bot2=_pos[keys[idx2]]
line, =plt.plot([idx1,idx2],[he1/2+bot1,he2/2+bot2],color="gray")
# r1=ax.transData.transform([idx1, he1/2+bot1])
# r2=ax.transData.transform([idx2, he2/2+bot2])
r1=np.array([idx1, he1/2+bot1])
r2=np.array([idx2, he2/2+bot2])
r=r2-r1
#print(ax.get_xlim(),ax.get_ylim())
r=np.array([1,3])*r/np.array([ax.get_xlim()[1]-ax.get_xlim()[0],ax.get_ylim()[1]-ax.get_ylim()[0]])
#r=ax.transData.transform(r)
if idx2<idx1:
r=-r
#print(r)
r=r*(r @ r)**(-0.5)
#print(h,r)
angle=np.arccos(r[0])
if r[1]<0:
angle= -angle
#print(angle)
_line_annotate( "mlp="+str(np.round(-np.log10(pval), decimals=1)), line, (idx1+idx2)/2, color="magenta")
# plt.text((idx1+idx2)/2, 0.5*(he1/2+bot1+he2/2+bot2), "mlp="+str(np.round(-np.log10(pval), decimals=1)),
# color="magenta", va="center",ha="center", rotation=360*angle/(2*np.pi),)
# plt.annotate("mlp="+str(np.round(-np.log10(pval), decimals=1)),[(r1[0]+r2[0])/2, 0.5*(r1[1]+r2[1])],
# color="magenta",ha="center", rotation=360*angle/(2*np.pi),xycoords='figure pixels')
#
if show:
plt.show()
if title !="":
fig.suptitle(title)
return {"pval":pvals,"axes":ax}
def _float2hist(val, bin_num=10):
nans=np.isnan(val)
ranges=[]
allcounts=[]
if np.sum(nans)>0:
val=val[~nans]
nancount=np.sum(nans)
ranges.append("NA")
allcounts.append(nancount)
n=(np.amax(val)-np.amin(val))/bin_num
binsize=round(n,-round(np.log10(n)))
_n=np.abs(np.amin(val))
minval=round(_n,-round(np.log10(n)))
if np.amin(val)<0:
minval-=binsize
_n=np.abs(np.amax(val))
maxval=round(_n,-round(np.log10(n)))
bins=np.linspace(minval, maxval, bin_num+1)
counts, _=np.histogram(val, bins=bins)
for i in range(counts.shape[0]):
if np.abs(bins[i])<0.001 or np.abs(bins[i])>=10**3:
num1="{:.2E}".format(bins[i])
num2="{:.2E}".format(bins[i+1])
else:
num1=str(bins[i])
num2=str(bins[i+1])
ranges.append(num1+"-"+num2)
allcounts.append(counts[i])
return ranges, allcounts
def _float2cat(val, bin_num=10):
nans=np.isnan(val)
if np.sum(nans)>0:
_val=val[~nans]
else:
_val=val
n=(np.amax(_val)-np.amin(_val))/bin_num
binsize=round(n,-round(np.log10(n)))
_n=np.abs(np.amin(_val))
minval=round(_n,-round(np.log10(n)))
if np.amin(_val)<minval:
minval-=binsize
if np.amin(_val) >0 and minval <0:
minval=0
_n=np.abs(np.amax(_val))
maxval=round(_n,-round(np.log10(n)))
if maxval<np.amax(_val):
maxval+=binsize
bins=[ minval+i*binsize for i in range(bin_num) if minval+i*binsize<=maxval]
bindict={}
for b in bins[:-1]:
if binsize<0.001 or binsize>=10**3:
num1="{:.2E}".format(b)
num2="{:.2E}".format(b+binsize)
else:
num1=str(b)
num2=str(b+binsize)
bindict[b]=num1+"-"+num2
val_string=[]
for v in val:
if np.isnan(v)==True:
val_string.append("NA")
else:
label=""
for i in range(len(bins)-1):
if bins[i] <=v< bins[i]+binsize:
label=bindict[bins[i]]
break
if label=="":
if v==maxval:
label=bindict[bins[i]]
val_string.append(label)
return val_string
def stacked_barplot(df: pd.DataFrame,
x: Union[str, list],
hue: Union[str, list],
scale: str="fraction",
order: Optional[list]=None,
hue_order: Optional[list]=None,
test_pairs: List[List[str]]=[],
palette: Union[str,dict]="tab20c",
show_values: bool=True,
show_values_intact: bool=False,
show: bool=False,
figsize: List[int]=[],
xunit: str="",
yunit: str="",
title: str="",
hatch: bool=False,
rotation: int=90,
ax: Optional[plt.Axes]=None,
show_legend:bool=True,
bin_num: Union[dict, int]=10,
ylim: Optional[int]=None)-> Dict:
"""
Drawing a stacked barplot by counting observations with or without the fisher's exact test.
Parameters
----------
df : pandas DataFrame
x: str or list
The category to place in x axis. Multiple categories can be passed by a list.
It mainly works with categorical values, including strings boolians and integers, but also can take float values by automatically creating a histogram.
hue: str or list
Counting samples by the hue category. Multiple categories can be passed by a list.
order: list, optional
The order of x axis labels
hue_order: list, optional
The order of hue labels
scale: str, optional
Scaling method. Available options are: fraction, percentage, absolute
test_pairs : list, optional
pairs of categorical values related to x. It will calculate -log10 (p value) (mlp) of the fisher exact test.
Examples: [["Adelie","Chinstrap" ],
["Gentoo","Chinstrap" ],
["Adelie","Gentoo" ]]
palette : str or dict, optional (default: "tab20c")
A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array.
e.g.) palette={""}
show_values: bool, optional
Wheter to exhibit the values of fractions/counts/percentages.
show_values_intact: bool, optional
Wheter to exhibit non-scaled counts.
show : bool, optional
Whether or not to show the figure.
figsize : List[int], optional
The figure size, e.g., [4, 6].
title: str optional, (default:"")
The title of the figure.
hatch: bool, optional (default: False)
Adding hatches to the bars
rotation: int, optional (default:90)
The orientation of the x axis labels.
ax: plt.Axes, optional (default: None)
The ax object to be plotted.
show_legend: bool, optional (default: True)
Whether to show legends.
bin_num: dict, int, optional (default: 10)
A histogram bin number when columns with float values are selected.
You can specify the bin number of each column by using dictionary (e.g., bin_num={"A":10,"B",5}).
Returns
-------
{"pval":pvals,"axes":ax,"data":data} : dict
Raises
------
Notes
-----
References
----------
See Also
--------
Examples
--------
"""
if type(df)==Dict:
df=pd.DataFrame(df)
if type(x)==str:
x=[x]
if order!=None:
order=[order]
if type(hue)==str:
hue=[hue]
if hue_order!=None:
hue_order=[hue_order]
for _x in x:
if df[_x].dtype==float:
if type(bin_num)==dict:
df[_x]=_float2cat(df[_x].values,bin_num=bin_num[_x])
else:
df[_x]=_float2cat(df[_x].values,bin_num=bin_num)
elif df[_x].isnull().values.any():
df[_x]=df[_x].replace(np.nan, "NA")
for _hue in hue:
if df[_hue].dtype==float:
if type(bin_num)==dict:
df[_hue]=_float2cat(df[_hue].values,bin_num=bin_num[_x])
else:
df[_hue]=_float2cat(df[_hue].values,bin_num=bin_num)
elif df[_hue].isnull().values.any():
df[_hue]=df[_hue].replace(np.nan, "NA")
data: dict={}
xkeys: dict={}
keysx: dict={}
meankey_len=0
for i, _x in enumerate(x):
if order==None:
u=np.unique(df[_x])
keys=sorted(list(u))
else:
keys=order[i]
meankey_len+=len(keys)
xkeys[_x]=keys
for k in keys:
keysx[k]=_x
meankey_len=meankey_len//len(x)
huekeys={}
for i, _hue in enumerate(hue):
if hue_order==None:
u=np.unique(df[_hue])
hues=sorted(list(u))
else:
hues=hue_order[i]
huekeys[_hue]=hues
for _x, keys in xkeys.items():
data[_x]={}
for key in keys:
for _hue, hues in huekeys.items():
if _x==_hue:
continue
if not _hue in data[_x]:
data[_x][_hue]={}
data[_x][_hue][key]=[]
for h in hues:
data[_x][_hue][key].append(np.sum((df[_x]==key) & (df[_hue]==h)))
pvals={}
if len(test_pairs) >0:
print("mlp stands for -log10(p value)")
for _hue in hue:
for i, h in enumerate(huekeys[_hue]):
for p1,p2 in test_pairs:
_x=keysx[p1]
__x=keysx[p2]
if _x!=__x:
raise Exception("{} and {} can not be compared.".format(p1, p2))
if _x==_hue:
continue
if not _x in pvals:
pvals[_x]={}
if not _hue in pvals[_x]:
pvals[_x][_hue]={}
if not h in pvals[_x][_hue]:
pvals[_x][_hue][h]=[]
keys=xkeys[_x]
idx1=xkeys[_x].index(p1)
idx2=xkeys[_x].index(p2)
yes_total=np.sum(data[_x][_hue][keys[idx1]])
no_total=np.sum(data[_x][_hue][keys[idx2]])
yes_and_hue=data[_x][_hue][keys[idx1]][i]
no_and_hue=data[_x][_hue][keys[idx2]][i]
table=[[yes_and_hue, no_and_hue],
[yes_total-yes_and_hue, no_total-no_and_hue]]
odd, pval=fisher_exact(table)
pvals[_x][_hue][h].append([idx1, idx2, pval])
if scale=="fraction":
for _x in x:
for _hue in hue:
if _x==_hue:
continue
for key in keys:
data[_x][_hue][key]=np.array(data[_x][_hue][key])/np.sum(data[_x][_hue][key])
elif scale=="percentage":
for _x in x:
for _hue in hue:
if _x==_hue:
continue
for key in keys:
data[_x][_hue][key]=np.array(data[_x][_hue][key])/np.sum(data[_x][_hue][key])*100
if type(palette)==str:
cmap=plt.get_cmap(palette)
ncols=len(x)*len(hue)-len(set(x)&set(hue))
if ncols<1:
ncols=1
if len(figsize)==0:
figsize=[(4+0.1*meankey_len)*ncols, 6]
if ax!=None:
axes=ax
fig=None
else:
fig, axes=plt.subplots(figsize=figsize,ncols=ncols)
if ncols==1:
axes=[axes]
else:
axes=axes.flatten()
if scale=="absolute":
unit=""
elif scale=="fraction":
unit=""
elif scale=="percentage":
unit="%"
axindex=0
pos={}
#_hatch_list=[hatch_list[i] for i in range(len(keys))]
for _x in x:
pos[_x]={}
for _hue in hue:
if _x==_hue:
continue
pos[_x][_hue]={}
keys=xkeys[_x]
hues=huekeys[_hue]
bottom=np.zeros([len(keys)])
for i, h in enumerate(hues):
ax=axes[axindex]
heights=np.array([data[_x][_hue][key][i] for key in keys])
if type(palette)==dict:
if hatch==True:
ax.bar(keys, heights,width=0.5, bottom=bottom, color=palette[h], label=h, hatch=hatch_list[i])
else:
ax.bar(keys, heights,width=0.5, bottom=bottom, color=palette[h], label=h)
else:
if hatch==True:
ax.bar(keys, heights,width=0.5, bottom=bottom, color=cmap(i/len(hues)), label=h, hatch=hatch_list[i])
else:
ax.bar(keys, heights,width=0.5, bottom=bottom, color=cmap(i/len(hues)), label=h)
if show_values==True:
for j in range(len(keys)):
if scale=="absolute":
ax.text(j,bottom[j]+heights[j]/2,"{}{}".format(heights[j],unit),
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
else:
ax.text(j,bottom[j]+heights[j]/2,"{:.2f}{}".format(heights[j],unit),
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
pos[_x][_hue][h]={key: [he, bo] for key, he, bo in zip(keys, heights, bottom)}
bottom+=heights
if len(keys)==1:
ax.set_xticks(ax.get_xticks(), labels=keys, rotation=rotation)
ax.margins(x=1)
else:
ax.set_xticks(ax.get_xticks(), labels=keys, rotation=rotation)
#ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
if show_legend==True:
ax.legend(title=_hue,loc=[1.01,0])
ax.set_xlabel(_x)
if scale=="absolute":
ylabel="Counts"
elif scale=="fraction":
ylabel="Fraction"
elif scale=="percentage":
ylabel="Percentage"
ax.set_ylabel(ylabel)
axindex+=1
if len(pvals)>0 and _x in pvals:
for _hue in hue:
if _x==_hue:
continue
if not _hue in pos[_x]:
continue
hues=huekeys[_hue]
for i, h in enumerate(hues):
#print(pos)
#print(pos[_x])
_pos=pos[_x][_hue][h]
for idx1, idx2, pval in pvals[_x][_hue][h]:
he1, bot1=_pos[keys[idx1]]
he2, bot2=_pos[keys[idx2]]
line, =ax.plot([idx1,idx2],[he1/2+bot1,he2/2+bot2],color="gray")
# r1=ax.transData.transform([idx1, he1/2+bot1])
# r2=ax.transData.transform([idx2, he2/2+bot2])
r1=np.array([idx1, he1/2+bot1])
r2=np.array([idx2, he2/2+bot2])
r=r2-r1
#print(ax.get_xlim(),ax.get_ylim())
r=np.array([1,3])*r/np.array([ax.get_xlim()[1]-ax.get_xlim()[0],ax.get_ylim()[1]-ax.get_ylim()[0]])
#r=ax.transData.transform(r)
if idx2<idx1:
r=-r
#print(r)
r=r*(r @ r)**(-0.5)
#print(h,r)
angle=np.arccos(r[0])
if r[1]<0:
angle= -angle
#print(angle)
if pval < 0.05:
pval_str=str(np.round(-np.log10(pval), decimals=1))
else:
pval_str="ns"
_line_annotate( "mlp="+pval_str, line, (idx1+idx2)/2, color="magenta")
if scale=="absolute" and ylim !=None:
ax.set_ylim(0, ylim)
if len(x)==1 and len(hue)==1:
plt.subplots_adjust(top=0.93,left=0.1,right=0.7, bottom=0.17)
else:
plt.tight_layout(w_pad=2)
if title !="" and fig !=None:
fig.suptitle(title)
if show:
plt.show()
return {"pval":pvals,"axes":ax,"data":data}
def stacked_barplot_num(df: pd.DataFrame,
x: str="",
scale: str="fraction",
test_pairs: List[List[str]]=[],
palette: Union[str,dict]="tab20c",
show_values: bool=True,
show_values_intact: bool=False,
show: bool=False,
figsize: List[int]=[],
xlabel: str="",
ylabel: str="",
unit: Union[str, List, Dict]="",
title: str="",
hatch: bool=False,
rotation: Optional[int]=None,
ax: Optional[plt.Axes]=None,
show_legend:bool=True,
ylim: Optional[int]=None,
horizontal: bool=False,
margins: dict={},
gridspec_kw: dict={},
legend_row_num: int=2,
bar_width: float=0.75
)-> Dict:
"""
Drawing a stacked barplot by taking values as an input with or without the fisher's exact test.
Parameters
----------
df : pandas DataFrame
A wide-form data format.
e.g.,
Bournemouth Brighton
Possession 36 64
Shots 14 16
Shots on Target 3 6
Corners 5 5
Fouls 11 5
x: str
The category to place in x axis. If not set, it will use the index of the input dataframe.
scale: str, optional
Scaling method. Available options are: fraction, percentage, absolute
test_pairs : list, optional
pairs of categorical values related to x. It will calculate -log10 (p value) (mlp) of the fisher exact test.
Examples: [["Adelie","Chinstrap" ],
["Gentoo","Chinstrap" ],
["Adelie","Gentoo" ]]
palette : str or dict, optional (default: "tab20c")
A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array.
e.g.) palette={""}
show_values: bool, optional
Wheter to exhibit the values of fractions/counts/percentages.
show_values_intact: bool, optional
Wheter to exhibit non-scaled values.
show : bool, optional
Whether or not to show the figure.
figsize : List[int], optional
The figure size, e.g., [4, 6].
title: str optional, (default:"")
The title of the figure.
hatch: bool, optional (default: False)
Adding hatches to the bars
rotation: int, optional (default:90)
The orientation of the x axis labels.
ax: plt.Axes, optional (default: None)
The ax object to be plotted.
show_legend: bool, optional (default: True)
Whether to show legends.
bin_num: dict, int, optional (default: 10)
A histogram bin number when columns with float values are selected.
You can specify the bin number of each column by using dictionary (e.g., bin_num={"A":10,"B",5}).
Returns
-------
{"pval":pvals,"axes":ax,"data":data}: dict
Raises
------
Notes
-----
References
----------
See Also
--------
Examples
--------
"""
if type(df)==Dict:
df=pd.DataFrame(df)
pvals={}
# if len(test_pairs) >0:
# print("mlp stands for -log10(p value)")
# for _hue in hue:
# for i, h in enumerate(huekeys[_hue]):
# for p1,p2 in test_pairs:
# yes_total=np.sum(data[_x][_hue][keys[idx1]])
# no_total=np.sum(data[_x][_hue][keys[idx2]])
# yes_and_hue=data[_x][_hue][keys[idx1]][i]
# no_and_hue=data[_x][_hue][keys[idx2]][i]
# table=[[yes_and_hue, no_and_hue],
# [yes_total-yes_and_hue, no_total-no_and_hue]]
# odd, pval=fisher_exact(table)
# pvals[_x][_hue][h].append([idx1, idx2, pval])
if len(gridspec_kw)==0:
if horizontal==True:
gridspec_kw=dict(top=0.75,left=0.25,right=0.9, bottom=0.17)
else:
gridspec_kw=dict(top=0.93,left=0.1,right=0.7, bottom=0.17)
if len(margins)==0:
if horizontal==True and scale!="absolute":
margins=dict(x=0)
else:
margins=dict(x=0.1)
if len(figsize)==0:
if horizontal==True:
figsize=[6, len(df.index)/2]
else:
figsize=[len(df.columns)+1, 6]
if ax!=None:
fig=None
else:
fig, ax=plt.subplots(figsize=figsize,gridspec_kw=gridspec_kw)
plt.margins(**margins)
if unit=="":
if scale=="absolute":
unit=""
elif scale=="fraction":
unit=""
elif scale=="percentage":
unit="%"
pos={}
if x=="":
keys=list(df.index)
_df=df
else:
keys=list(df[x])
_df=df.drop(columns=[x])
if scale=="fraction":
_df=_df.div(_df.sum(axis=1),axis = 'rows',)
elif scale=="percentage":
_df=100*_df.div(_df.sum(axis=1),axis = 'rows',)
if type(palette)==str:
cmap=plt.get_cmap(palette, len(_df.columns))
pos={}
bottom=np.zeros([len(keys)])
for i, col in enumerate(_df.columns):
# print(col)
heights=_df[col].values
# print(heights,bottom)
originalval=df[col].values
if horizontal==True:
if type(palette)==dict:
if hatch==True:
ax.barh(keys, width=heights,height=bar_width, left=bottom, color=palette[col], label=col, hatch=hatch_list[i])
else:
ax.barh(keys, width=heights,height=bar_width, left=bottom, color=palette[col], label=col)
else:
if hatch==True:
ax.barh(keys, width=heights,height=bar_width, left=bottom, color=cmap(i), label=col, hatch=hatch_list[i])
else:
ax.barh(keys, width=heights,height=bar_width, left=bottom, color=cmap(i), label=col)
else:
if type(palette)==dict:
if hatch==True:
ax.bar(keys, heights,width=bar_width, bottom=bottom, color=palette[col], label=col, hatch=hatch_list[i])
else:
ax.bar(keys, heights,width=bar_width, bottom=bottom, color=palette[col], label=col)
else:
if hatch==True:
ax.bar(keys, heights,width=bar_width, bottom=bottom, color=cmap(i), label=col, hatch=hatch_list[i])
else:
ax.bar(keys, heights,width=bar_width, bottom=bottom, color=cmap(i), label=col)
if show_values==True:
for j, k in enumerate(keys):
if type(unit)==dict:
if k in unit:
u=unit[k]
else:
u=""
elif type(unit)==list:
u=unit[j]
else:
u=unit
if horizontal==True:
if show_values_intact==True:
ax.text(bottom[j]+heights[j]/2, j,"{}{}".format(originalval[j],u), va="center",ha="center",
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
else:
ax.text(bottom[j]+heights[j]/2,j,"{:.2f}{}".format(heights[j],u), va="center",ha="center",
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
else:
if show_values_intact==True:
ax.text(j,bottom[j]+heights[j]/2,"{}{}".format(originalval[j],u), ha="center",
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
else:
ax.text(j,bottom[j]+heights[j]/2,"{:.2f}{}".format(heights[j],u), ha="center",
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
pos[col]={key: [he, bo] for key, he, bo in zip(keys, heights, bottom)}
bottom+=heights
#ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
if show_legend==True:
if horizontal==True:
ax.legend(loc=[0,1.01], ncols=len(_df.columns)//legend_row_num+int(len(_df.columns)%legend_row_num!=0))
else:
ax.legend(loc=[1.01,0])
if horizontal==True:
if rotation==None:
rotation=0
if len(keys)==1:
ax.set_yticks(ax.get_yticks(), labels=keys, rotation=rotation)
ax.margins(x=1)
else:
ax.set_yticks(ax.get_yticks(), labels=keys, rotation=rotation)
ax.set_ylabel(ylabel)
if xlabel =="":
if show_values_intact==False:
if scale=="absolute":
xlabel="Counts"
elif scale=="fraction":
xlabel="Fraction"
elif scale=="percentage":
xlabel="Percentage"
ax.set_xlabel(xlabel)
else:
if rotation==None:
rotation=90
if len(keys)==1:
ax.set_xticks(ax.get_xticks(), labels=keys, rotation=rotation)
ax.margins(x=1)
else:
ax.set_xticks(ax.get_xticks(), labels=keys, rotation=rotation)
ax.set_xlabel(xlabel)
if ylabel =="":
if show_values_intact==False:
if scale=="absolute":
ylabel="Counts"
elif scale=="fraction":
ylabel="Fraction"
elif scale=="percentage":
ylabel="Percentage"
ax.set_ylabel(ylabel)
# if len(pvals)>0:
# for _hue in hue:
# if _x==_hue:
# continue
# if not _hue in pos[_x]:
# continue
# hues=huekeys[_hue]
# for i, h in enumerate(hues):
# #print(pos)
# #print(pos[_x])
# _pos=pos[_x][_hue][h]
# for idx1, idx2, pval in pvals[_x][_hue][h]:
# he1, bot1=_pos[keys[idx1]]
# he2, bot2=_pos[keys[idx2]]
# line, =ax.plot([idx1,idx2],[he1/2+bot1,he2/2+bot2],color="gray")
# # r1=ax.transData.transform([idx1, he1/2+bot1])
# # r2=ax.transData.transform([idx2, he2/2+bot2])
# r1=np.array([idx1, he1/2+bot1])
# r2=np.array([idx2, he2/2+bot2])
# r=r2-r1
# #print(ax.get_xlim(),ax.get_ylim())
# r=np.array([1,3])*r/np.array([ax.get_xlim()[1]-ax.get_xlim()[0],ax.get_ylim()[1]-ax.get_ylim()[0]])
# #r=ax.transData.transform(r)
# if idx2<idx1:
# r=-r
# #print(r)
# r=r*(r @ r)**(-0.5)
# #print(h,r)
# angle=np.arccos(r[0])
# if r[1]<0:
# angle= -angle
# #print(angle)
# if pval < 0.05:
# pval_str=str(np.round(-np.log10(pval), decimals=1))
# else:
# pval_str="ns"
# _line_annotate( "mlp="+pval_str, line, (idx1+idx2)/2, color="magenta")
if scale=="absolute" and ylim !=None:
ax.set_ylim(0, ylim)
if horizontal==True:
ax.invert_yaxis()
if title !="" and fig !=None:
fig.suptitle(title)
if show:
plt.show()
return {"pval":pvals,"axes":ax}
def nice_piechart(df: pd.DataFrame,
category: Union[str, List[str]],
palette: str="tab20c",
order: str="largest",
ncols: int=2,
ignore: float=0.05,
show_values: bool=True,
title: str="",
hatch: bool=False,
figsize: list=[],
show_legend:bool=False,
bbox_to_anchor: list=[1.1, 1],
right: float=0.7,bottom=0.1) ->Dict:
"""
Drawing a nice pichart by counting the occurrence of values from pandas dataframe. if you want to draw a pie chart from numerical values, try nice_piechart_num.
Parameters
----------
df : pandas DataFrame
category: str or list
The column names for counting values.
order: str, optional (default: "largest") ["largest", "smallest"]
How to sort values in the chart
ncols: int, optional (default: 2)
The column number of the figure.
ignore : float, optional (default: 0.05)
Remove annotations of which the fraction is smaller than this value.
palette : str or dict, optional (default: "tab20c")
A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array.
e.g.) palette={""}
show_values: bool, optional
Wheter to exhibit the values of fractions/counts/percentages.
show : bool, optional
Whether or not to show the figure.
figsize : List[int], optional
The figure size, e.g., [4, 6].
title: str optional, (default:"")
The title of the figure.
hatch: bool, optional (default: False)
Adding hatches to the bars
show_legend: bool, optional (default: False)
Whether to show legends.
bin_num: dict, int, optional (default: 10)
A histogram bin number when columns with float values are selected.
You can specify the bin number of each column by using dictionary (e.g., bin_num={"A":10,"B",5}).
Returns
-------
{"axes":ax}: dict
Raises
------
Notes
-----
References
----------
See Also
--------
Examples
--------
"""
if type(category)==str:
category=[category]
nrows=len(category)//ncols+int(len(category)%ncols!=0)
if show_legend==True or hatch==True:
if len(figsize)==0:
figsize=[ncols*4,nrows*3]
fig, axes=plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
else:
if len(figsize)==0:
figsize=[ncols*3,nrows*3]
fig, axes=plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
if title !="":
fig.suptitle(title)
axes=axes.flatten()
for cat, ax in zip(category, axes):
u, c=np.unique(df[cat], return_counts=True)
srt=np.argsort(c)[::-1]
u=u[srt]
c=c[srt]
_c=c/np.sum(c)
_cmap=plt.get_cmap(palette, c.shape[0])
colors=[_cmap(i) for i in range(c.shape[0])]
for j in range(c.shape[0]):
if _c[j]<ignore:
colors[j]=[0,0,0,1]
u[j]=""
continue
if show_values==True:
u[j]=u[j]+"\n("+str(100*np.round(_c[j],1))+"%)"
if hatch==True:
ax.pie(c, labels=u,
counterclock=False,
startangle=90,
colors=colors,
radius=1.25,
hatch=hatch_list[:c.shape[0]],
labeldistance=None)
ax.legend(bbox_to_anchor=[1,1])
else:
if show_legend==True:
ax.pie(c, labels=u,
counterclock=False,
startangle=90,
colors=colors,
radius=1.25,
labeldistance=None)
ax.legend(bbox_to_anchor=[1,1])
else:
ax.pie(c, labels=u,
counterclock=False,
startangle=90,
colors=colors,
labeldistance=0.6,
radius=1.25)
ax.set_title(cat,backgroundcolor='lavender',pad=10)
if len(category)!=ncols*nrows:
for i in range(ncols*nrows-len(category)):
fig.delaxes(axes[-(i+1)])
# plt.tight_layout(h_pad=1)
plt.subplots_adjust(top=0.9,wspace=0.5)
if title !="":
fig.suptitle(title)
return {"axes":ax}
def nice_piechart_num(df: pd.DataFrame,
variables: List[str]=[],
category: str="" ,
palette: str="tab20c",
ncols: int=2,
ignore: float=0.05,
show_values: bool=True,
title: str="",
figsize=[]) ->Dict:
"""
Drawing a nice pichart by taking numerical values.
Parameters
----------
df : pandas DataFrame
It must be a wide-form data. Each pie chart will be drawn from each row, and by default, index values are used for pie chart titles.
variables: list, optional
The column names to calculate fractions. If not given, all columns will be used for calculation (in this case, all columns must contain only numerical values).
order: str, optional (default: "largest") ["largest", "smallest"]
How to sort values in the chart
ncols: int, optional (default: 2)
The column number of the figure.
ignore : float, optional (default: 0.05)
Remove annotations of which the fraction is smaller than this value.
palette : str or dict, optional (default: "tab20c")
A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array.
e.g.) palette={""}
show_values: bool, optional
Wheter to exhibit the values of fractions/counts/percentages.
show : bool, optional
Whether or not to show the figure.
figsize : List[int], optional
The figure size, e.g., [4, 6].
title: str optional, (default:"")
The title of the figure.
hatch: bool, optional (default: False)
Adding hatches to the bars
show_legend: bool, optional (default: True)
Whether to show legends.
Returns
-------
dict {"axes":ax}
Raises
------
Notes
-----
References
----------
See Also
--------
Examples
--------
"""
if category=="":
category=list(df.index)
else:
df=df.set_index(category)
category=list(df.index)
if len(variables)!=0:
df=df[variables]
srt=np.argsort(df.sum(axis=0))[::-1]
df=df[df.columns[srt]]
variables=list(df.columns)
nrows=len(category)//ncols+int(len(category)%ncols!=0)
if len(figsize)==0:
figsize=[ncols*2,nrows*2]
fig, axes=plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
axes=axes.flatten()
_cmap=plt.get_cmap(palette, len(variables))
colors=[_cmap(i) for i in range(len(variables))]
for cat, ax in zip(category, axes):
c=df.loc[cat]
_c=c/np.sum(c)
ax.pie(c,
counterclock=False,
startangle=90,
colors=colors,
labeldistance=0.6,
radius=1.25)
ax.set_title(cat,backgroundcolor='lavender',pad=8)
if len(category)%ncols!=0:
for i in range(len(category)%ncols-2):
fig.delaxes(axes[-(i+1)])
plt.tight_layout(h_pad=1)
plt.subplots_adjust(top=0.95, right=0.81)
legend_elements = [Line2D([0], [0], marker='o', color='lavender', label=huelabel,markerfacecolor=color, markersize=10)
for color, huelabel in zip(colors, variables)]
fig.legend(handles=legend_elements,bbox_to_anchor=(1, 1))
if title!="":
plt.title(title)
return {"axes":ax}
def _nested_piechart(df: pd.DataFrame,
category: List[str],
variable: str="",
palette: str="tab20b",
figsize=[],
ignore: float=0.05,
show_values: bool=False,unit: str="",
show_percentage: bool=False,
colormode: str="independent",
ax: Optional[plt.Axes]=None,
title: str="",
hatch: bool=False,
show_legend:bool=False,
bbox_to_anchor: list=[1.1, 1],
right: float=0.7,bottom=0.1,
skip_na: bool=False,
order: str="smallest",) ->Dict:
"""
Drawing a nested pichart by counting the occurrence of string (or integer/boolian) values from pandas dataframe.
Parameters
----------
df : pandas DataFrame
category: str or list
The column names for counting values. The order of names will correspond to the hierarchical order of the pie chart.
order: str, optional (default: "largest") ["largest", "smallest"]
How to sort values in the chart
ignore : float, optional (default: 0.05)
Remove annotations of which the fraction is smaller than this value.
palette : str or dict, optional (default: "tab20c")
A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array.
e.g.) palette={""}
show_values: bool, optional
Wheter to exhibit the values of fractions/counts/percentages.
show : bool, optional
Whether or not to show the figure.
figsize : List[int], optional
The figure size, e.g., [4, 6].
title: str optional, (default:"")
The title of the figure.
hatch: bool, optional (default: False)
Adding hatches to the bars
show_legend: bool, optional (default: False)
Whether to show legends.
Returns
-------
{"axes":ax}: dict
Raises
------
Notes
-----
References
----------
See Also
--------
Examples
--------
"""
#df=df.fillna("NA")
alllabels={}
alllabel_sets=set()
for i, cat in enumerate(category):
alllabels[i]=[]
for u in df[cat].unique():
alllabels[i].append(u)
alllabel_sets.add(u)
alllabel_sets=sorted(list(alllabel_sets))
if colormode=="independent":
_cmap=plt.get_cmap(palette, len(alllabel_sets))
color_lut={u: _cmap(i) for i, u in enumerate(alllabel_sets)}
marker_lut={u: hatch_list[i] for i, u in enumerate(alllabel_sets)}
elif colormode=="hierarchical" or colormode=="top":
_cmap=plt.get_cmap(palette, len(alllabels[0]))
color_lut={u: _cmap(i) for i, u in enumerate(alllabels[0])}
data={}
for i in range(len(category)):
data[i]={"labels":[],"counts":[]}
for p in it.product(*[alllabels[j] for j in range(i+1)]):
tmp=df
for k, _p in enumerate(p):
tmp=tmp.loc[tmp[category[k]]==_p]
#print(p, len(tmp))
if variable !="":
data[i]["counts"].append(tmp[variable].sum())
else:
data[i]["counts"].append(len(tmp))
data[i]["labels"].append(p)
if ax==None:
if len(figsize)==0:
if (show_legend==True or hatch==True) and colormode=="independent":
figsize=[8,6]
else:
figsize=[6,6]
fig, ax = plt.subplots(figsize =figsize)
else:
fig=None
height=1
_bottom=0
for i, d in data.items():
x=np.array(d["counts"])
y=np.ones(x.shape)*height
percent=np.round(100*x/np.sum(x),2)
xpi=x/np.sum(x)*2*np.pi
s=np.pi/2
label=d["labels"]
#print(label, x)
for i, (_x,_label) in enumerate(zip(xpi,label)):
if skip_na==True and _label[-1]=="NA":
s-=_x
continue
if colormode=="independent":
_color=color_lut[_label[-1]]
edgecolor="white"
elif colormode=="top":
_color=color_lut[_label[0]]
edgecolor="white"
elif colormode=="hierarchical":
_color=np.array(color_lut[_label[0]])
grad=(1-np.amin(_color))/(len(category)-1)
_color=_color+grad*(len(_label)-1)/len(category)
_color=np.where(_color>1,1.0,_color)
edgecolor="darkgray"
if hatch==True:
_baumkuchen(ax,s, -_x, _bottom, _bottom+height, int(100*_x/np.sum(xpi)), _color,edgecolor=edgecolor, linewidth =1,hatch=marker_lut[_label[-1]])
else:
_baumkuchen(ax,s, -_x, _bottom, _bottom+height, int(100*_x/np.sum(xpi)), _color,edgecolor=edgecolor, linewidth =1)
txt=""
if show_legend==False or colormode!="independent":
txt= _label[-1]
if show_values==True:
if x[i]>10000 or x[i] < 0.0001:
txt=txt+"\n{:.3E} {}".format(x[i], unit)
else:
txt=txt+"\n{:.3f} {}".format(x[i], unit)
if show_percentage==True:
txt=txt+"\n({}%)".format(percent[i])
txt=txt.strip("\n")
showlabel=False
if show_legend==False:
showlabel=True
elif hatch==False:
showlabel=True
elif colormode!="independent":
showlabel=True
if _x/(2*np.pi)>=ignore and showlabel==True:
ax.text(np.cos(s-_x/2)*(_bottom+height/2), np.sin(s-_x/2)*(_bottom+height/2),txt, ha="center",
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
elif _x/(2*np.pi)>=ignore and (show_values==True or show_percentage==True):
ax.text(np.cos(s-_x/2)*(_bottom+height/2), np.sin(s-_x/2)*(_bottom+height/2),txt, ha="center",
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
s-=_x
_bottom+=height
height=height*0.6
if show_legend==True or hatch==True:
if colormode=="independent":
for i, cat in enumerate(category):
if hatch==True:
handles = [
Patch(facecolor=color_lut[label], label=label, hatch=marker_lut[label])
for label in alllabels[i]]
else:
handles = [
Patch(facecolor=color_lut[label], label=label)
for label in alllabels[i]]
legend=ax.legend(handles=handles,bbox_to_anchor=[bbox_to_anchor[0], bbox_to_anchor[0]-i*0.3], title=cat, loc="center left")
#legend=fig.legend(handles=handles,loc="outside right upper", title=cat)
#bbox_to_anchor[1]-=0.3
ax.add_artist(legend)
plt.subplots_adjust(right=right, bottom=bottom)
ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor("white")
if title!="":
plt.title(title)
return ax
def nested_piechart(df: pd.DataFrame,
category: List[str],
variable: str="",
palette: str="tab20b",
figsize: list=[],
ignore: float=0.05,
show_values: bool=False,unit: str="",
show_percentage: bool=False,
colormode: str="independent",
ax: Optional[plt.Axes]=None,
title: str="",
hatch: bool=False,
show_legend:bool=False,
bbox_to_anchor: list=[1.1, 1],
right: float=0.7,
bottom: float=0.1,
skip_na: bool=False,
order: str="intact",) ->Dict:
"""
Drawing a nested pichart by counting the occurrence of string (or integer/boolian) values from pandas dataframe.
Parameters
----------
df : pandas DataFrame
category: str or list
The column names for counting values. The order of names will correspond to the hierarchical order of the pie chart.
order: str, optional (default: "largest") ["largest", "smallest", "intact"]
How to sort values in the chart
ignore : float, optional (default: 0.05)
Remove annotations of which the fraction is smaller than this value.
palette : str or dict, optional (default: "tab20b")
A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array.
e.g.) palette={""}
show_values: bool, optional
Wheter to exhibit values/counts.
show_percentage: bool, optional
Wheter to exhibit percentages.
show : bool, optional
Whether or not to show the figure.
figsize : List[int], optional
The figure size, e.g., [4, 6].
title: str optional, (default:"")
The title of the figure.
hatch: bool, optional (default: False)
Adding hatches to the bars
show_legend: bool, optional (default: False)
Whether to show legends.
colormode: str, optional (default: "independent") ["independent", "hierarchical", "top"]
Choosing the ways to color labels. "independent" will give each label a different color (from a single color palette).
"top" will give different colors to only labels in the top, and other labels follow the same color with the top category they belong to.
"hierarchical" is similar to "top", but give fainter colors to labels in the lower hierarchical categories.
bbox_to_anchor: list, optional (default:[1.1, 1])
the location of legends to appear
right: float, optional (default: 0.7)
In the case that legends are truncated by the frame of the figure, decrease this value.
bottom: float, optional (default: 0.1)
skip_na: bool, optional (default: False)
Whether to skip drawing samples labeled "NA". If True, there will be empty spaces for samples with "NA".
Returns
-------
{"axes":ax}: dict
Raises
------
Notes
-----
References
----------
See Also
--------
Examples
--------
"""
#df=df.fillna("NA")
alllabels={}
alllabel_sets=set()
for i, cat in enumerate(category):
alllabels[i]=[]
for u in df[cat].unique():
alllabels[i].append(u)
alllabel_sets.add(u)
alllabel_sets=sorted(list(alllabel_sets))
if colormode=="independent":
_cmap=plt.get_cmap(palette, len(alllabel_sets))
color_lut={u: _cmap(i) for i, u in enumerate(alllabel_sets)}
marker_lut={u: hatch_list[i] for i, u in enumerate(alllabel_sets)}
elif colormode=="hierarchical" or colormode=="top":
_cmap=plt.get_cmap(palette, len(alllabels[0]))
color_lut={u: _cmap(i) for i, u in enumerate(alllabels[0])}
# counting data in each level of categories.
# {0: {'labels': [('Adelie',), ('Chinstrap',), ('Gentoo',)],
# 'counts': [152, 68, 124]},
# 1: {'labels': [('Adelie', 'Male'),
# ('Adelie', 'Female'),
# ('Adelie', 'NA'),
# ('Chinstrap', 'Male'),
# ('Chinstrap', 'Female'),
# ('Chinstrap', 'NA'),
# ('Gentoo', 'Male'),
# ('Gentoo', 'Female'),
# ('Gentoo', 'NA')],
# 'counts': [73, 73, 6, 34, 34, 0, 61, 58, 5]}}
data={}
for i in range(len(category)):
data[i]={"labels":[],"counts":[]}
for p in it.product(*[alllabels[j] for j in range(i+1)]):
tmp=df
for k, _p in enumerate(p):
tmp=tmp.loc[tmp[category[k]]==_p]
#print(p, len(tmp))
if variable !="":
data[i]["counts"].append(tmp[variable].sum())
else:
data[i]["counts"].append(len(tmp))
data[i]["labels"].append(p)
if ax==None:
if len(figsize)==0:
if (show_legend==True or hatch==True) and colormode=="independent":
figsize=[8,6]
else:
figsize=[6,6]
fig, ax = plt.subplots(figsize =figsize)
else:
fig=None
# Reorganizing data in order to sort the counts
#
_data2=[]
lastk=np.amax(list(data.keys()))
#print(lastk)
for k, v in data.items():
v["counts"]=np.array(v["counts"])
v["labels"]=np.array(v["labels"])
if order=="largest":
srt=np.argsort(v["counts"])[::-1]
v["counts"]=v["counts"][srt]
v["labels"]=v["labels"][srt]
elif order=="smallest":
srt=np.argsort(v["counts"])
v["counts"]=v["counts"][srt]
v["labels"]=v["labels"][srt]
elif order=="intact":
pass
else:
raise Exception("Unknown order type.")
tmp=[]
latmp=[]
cotmp=[]
if len(_data2)==0:
for l in v["labels"]:
_l="|:|".join(l)
latmp.append(_l)
tmp.append({_l:{"labels":[],"counts":[]}})
else:
# a brutal way to resort data according to the parent category order
# this will not work for a large dataset
for parentd in _data2[-1]:
key=list(parentd.keys())[0]
for l, c in zip(v["labels"], v["counts"]):
_l="|:|".join(l)
if _l.startswith(key):
latmp.append(_l)
tmp.append({_l:{"labels":[],"counts":[]}})
cotmp.append(c)
if len(_data2)!=0:
for la, co in zip(latmp, cotmp):
parent=la.rsplit("|:|",1)[0]
for parentd in _data2[-1]:
if parent in parentd:
#parentd[parent].append([la, co])
parentd[parent]["labels"].append(la)
parentd[parent]["counts"].append(co)
else:
parent="top"
parentd={"top":{"labels":[],"counts":[]}}
for la, co in zip(latmp, v["counts"]):
parentd[parent]["labels"].append(la)
parentd[parent]["counts"].append(co)
_data2.append([parentd])
if lastk==k:
break
_data2.append(tmp)
#print(data)
print(_data2)
# Drawing the pie chart.
# _data2 looks like the bellow. A nested list ordered from the top to bottom hierarchical categories (in the below, (species, sex, island)).
# The element of the list is also a list consists of dictionaries (if sorted, ordered according to the parent category) of which keys are from
# the parent category. And the values are dictionaries containing labels and counts. These labels are combination of those in the parent and
# present categories.
#
#
# [
# [{'top': {'labels': ['Adelie', 'Gentoo', 'Chinstrap'], 'counts': [152, 124, 68]}}],
#
# [{'Adelie': {'labels': ['Adelie|:|Female', 'Adelie|:|Male', 'Adelie|:|NA'], 'counts': [73, 73, 6]}},
# {'Gentoo': {'labels': ['Gentoo|:|Male', 'Gentoo|:|Female', 'Gentoo|:|NA'], 'counts': [61, 58, 5]}},
# {'Chinstrap': {'labels': ['Chinstrap|:|Female', 'Chinstrap|:|Male', 'Chinstrap|:|NA'], 'counts': [34, 34, 0]}}],
#
# [{'Adelie|:|Female': {'labels': ['Adelie|:|Female|:|Dream', 'Adelie|:|Female|:|Torgersen', 'Adelie|:|Female|:|Biscoe'], 'counts': [27, 24, 22]}},
# {'Adelie|:|Male': {'labels': ['Adelie|:|Male|:|Dream', 'Adelie|:|Male|:|Torgersen', 'Adelie|:|Male|:|Biscoe'], 'counts': [28, 23, 22]}},
# {'Adelie|:|NA': {'labels': ['Adelie|:|NA|:|Torgersen', 'Adelie|:|NA|:|Dream', 'Adelie|:|NA|:|Biscoe'], 'counts': [5, 1, 0]}},
# {'Gentoo|:|Male': {'labels': ['Gentoo|:|Male|:|Biscoe', 'Gentoo|:|Male|:|Torgersen', 'Gentoo|:|Male|:|Dream'], 'counts': [61, 0, 0]}},
# {'Gentoo|:|Female': {'labels': ['Gentoo|:|Female|:|Biscoe', 'Gentoo|:|Female|:|Torgersen', 'Gentoo|:|Female|:|Dream'], 'counts': [58, 0, 0]}},
# {'Gentoo|:|NA': {'labels': ['Gentoo|:|NA|:|Biscoe', 'Gentoo|:|NA|:|Dream', 'Gentoo|:|NA|:|Torgersen'], 'counts': [5, 0, 0]}},
# {'Chinstrap|:|Female': {'labels': ['Chinstrap|:|Female|:|Dream', 'Chinstrap|:|Female|:|Torgersen', 'Chinstrap|:|Female|:|Biscoe'], 'counts': [34, 0, 0]}},
# {'Chinstrap|:|Male': {'labels': ['Chinstrap|:|Male|:|Dream', 'Chinstrap|:|Male|:|Biscoe', 'Chinstrap|:|Male|:|Torgersen'], 'counts': [34, 0, 0]}},
# {'Chinstrap|:|NA': {'labels': ['Chinstrap|:|NA|:|Torgersen', 'Chinstrap|:|NA|:|Biscoe', 'Chinstrap|:|NA|:|Dream'], 'counts': [0, 0, 0]}}]
# ]
#
#
height=1
_bottom=0
for h, ds in enumerate(_data2):
s=np.pi/2
for h2, dg in enumerate(ds):
for h3, d in dg.items():
#print(h, h2,h3,d)
x=np.array(d["counts"])
if np.sum(x)==0:
continue
if h==0:
total=np.sum(x)
y=np.ones(x.shape)*height
percent=np.round(100*x/total,2)
xpi=x/total*2*np.pi
label=d["labels"]
#print(label, x)
for i, (_x,_label) in enumerate(zip(xpi,label)):
_label=_label.split("|:|")
if skip_na==True and _label[-1]=="NA":
s-=_x
continue
if colormode=="independent":
_color=color_lut[_label[-1]]
edgecolor="white"
elif colormode=="top":
_color=color_lut[_label[0]]
edgecolor="white"
elif colormode=="hierarchical":
_color=np.array(color_lut[_label[0]])
grad=(1-np.amin(_color))/(len(category)-1)
_color=_color+grad*(len(_label)-1)/len(category)
_color=np.where(_color>1,1.0,_color)
edgecolor="darkgray"
if hatch==True:
_baumkuchen(ax,s, -_x, _bottom, _bottom+height, int(100*_x/np.sum(xpi)), _color,edgecolor=edgecolor, linewidth =1,hatch=marker_lut[_label[-1]])
else:
_baumkuchen(ax,s, -_x, _bottom, _bottom+height, int(100*_x/np.sum(xpi)), _color,edgecolor=edgecolor, linewidth =1)
txt=""
if show_legend==False or colormode!="independent":
txt= _label[-1]
if show_values==True:
if x[i]>10000 or x[i] < 0.0001:
txt=txt+"\n{:.3E} {}".format(x[i], unit)
else:
txt=txt+"\n{:.3f} {}".format(x[i], unit)
if show_percentage==True:
txt=txt+"\n({}%)".format(percent[i])
txt=txt.strip("\n")
showlabel=False
if show_legend==False:
showlabel=True
elif hatch==False:
showlabel=True
elif colormode!="independent":
showlabel=True
if _x/(2*np.pi)>=ignore and showlabel:
ax.text(np.cos(s-_x/2)*(_bottom+height/2), np.sin(s-_x/2)*(_bottom+height/2),txt, ha="center",
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
elif _x/(2*np.pi)>=ignore and (show_values==True or show_percentage==True):
ax.text(np.cos(s-_x/2)*(_bottom+height/2), np.sin(s-_x/2)*(_bottom+height/2),txt, ha="center",
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8))
s-=_x
_bottom+=height
height=height*0.6
# Drawing legend. if hatch is True, because it will be too messy to add texts to the pie chart, it will automatically draw legends.
if show_legend==True or hatch==True:
if colormode=="independent":
for i, cat in enumerate(category):
if hatch==True:
handles = []
for label in alllabels[i]:
if skip_na==True and label=="NA":
continue
handles.append(Patch(facecolor=color_lut[label], label=label, hatch=marker_lut[label]))
else:
handles = []
for label in alllabels[i]:
if skip_na==True and label=="NA":
continue
handles.append(Patch(facecolor=color_lut[label], label=label) )
legend=ax.legend(handles=handles,bbox_to_anchor=[bbox_to_anchor[0], bbox_to_anchor[0]-i*0.3], title=cat, loc="center left")
#legend=fig.legend(handles=handles,loc="outside right upper", title=cat)
#bbox_to_anchor[1]-=0.3
ax.add_artist(legend)
plt.subplots_adjust(right=right, bottom=bottom)
ax.set_xticks([])
ax.set_yticks([])
ax.set_facecolor("white")
if title!="":
plt.title(title)
return ax
def stackedlines(df: pd.DataFrame,
x: str,
y: list,
sort: bool=True,
inverse: bool=False,
show_values: bool=False,
remove_all_zero: bool=False,
palette: str="tab20c",
figsize=[7,4],
ax: Optional[plt.Axes]=None,
alpha: float=0.75,
bbox_to_anchor: list=[1.7, 1],
right: float=0.7,
bottom: float=0.120,
show_legend: bool=True,
xlabel: str="",
ylabel: str="",
yunit: str="",
xunit: str="",
xformat: str="",
yformat: str="",
logscalex: bool=False,
logscaley: bool=False,
title: str="",
hatch: bool=False):
"""
Drawing a scatter plot of which points are represented by pie charts.
Parameters
----------
df : pandas DataFrame
A wide form dataframe. It accepts negative values.
e.g.)
year biofuel_consumption coal_consumption gas_consumption hydro_consumption nuclear_consumption oil_consumption
90 1990 16.733 5337.998 5170.609 864.271 1723.004 9306.913
91 1991 19.389 5287.613 5283.972 849.620 1829.645 9108.509
92 1992 22.045 5324.031 5463.509 743.463 1848.197 9297.387
93 1993 25.759 5522.452 5599.419 825.742 1822.853 9376.045
94 1994 28.846 5543.144 5731.081 766.870 1912.903 9619.746
95 1995 30.942 5593.053 5979.829 920.274 2011.356 9597.527
x : str
the name of a column to be the x axis of the plot.
y: list
the names of values to display as stacked lines
sort: bool, optional (default: True)
Whether to sort lines based on their values
show_values: bool, optional (default: False)
Whether to show percentages at the end of lines.
remove_all_zero: bool, optional (default: False)
pie_palette : str
A colormap name
xlabel: str, optional
x axis label
ylabel: str, optional
y axis label
ax: Optional[plt.Axes] optional, (default: None)
pyplot ax to add this scatter plot
alpha: float, optional (default: 0.75)
alpha of the stacked lines (areas).
bbox_to_anchor: list, optional (default: [1.7, 1])
The legend location
right: float=0.7,
bottom: float=0.120,
show_legend: bool=True,
yunit: str="", optional (default: "")
The y axis unit. it will appear at the top of the y axis.
xunit: str="", optional (default: "")
The X axis unit. it will appear at the top of the x axis.
title: str="", optional (default: "")
The figure title.
hatch: bool, optional (default: False)
Whether to add hatches to the plots.
Returns
-------
dict
Raises
------
Notes
-----
References
----------
See Also
--------
Examples
--------
"""
df=df.fillna(0)
X=np.array(df[x])
Y=[]
for col in y:
Y.append(np.array(df[col]))
Y=np.array(Y)
if remove_all_zero==True:
_filter=Y.sum(axis=0)!=0
Y=Y[:,_filter]
X=X[_filter]
Ydict={col:[] for col in y}
if sort==True:
if inverse==True:
for i, _x in enumerate(X):
_Y=Y[:,i]
srtidx=np.argsort(_Y)[::-1]
_bottom=0
_bottom_neg=0
for _idx in srtidx:
_col=y[_idx]
yval=Y[_idx,i]
if yval>=0:
Ydict[_col].append([_bottom, yval+_bottom])
_bottom+=yval
else:
Ydict[_col].append([_bottom_neg, yval+_bottom_neg])
_bottom_neg+=yval
else:
for i, _x in enumerate(X):
_Y=Y[:,i]
srtidx=np.argsort(_Y)
_bottom=0
_bottom_neg=0
for _idx in srtidx:
_col=y[_idx]
yval=Y[_idx,i]
if yval>=0:
Ydict[_col].append([_bottom, yval+_bottom])
_bottom+=yval
else:
Ydict[_col].append([_bottom_neg, yval+_bottom_neg])
_bottom_neg+=yval
else:
for i, _x in enumerate(X):
_Y=Y[:,i]
_bottom=0
_bottom_neg=0
for _idx,_col in enumerate(y):
yval=Y[_idx,i]
if yval>=0:
Ydict[_col].append([_bottom, yval+_bottom])
_bottom+=yval
else:
Ydict[_col].append([_bottom_neg, yval+_bottom_neg])
_bottom_neg+=yval
if ax ==None:
fig, ax=plt.subplots(figsize=figsize)
cmap=plt.get_cmap(palette, len(y))
colorlut={col: cmap(i) for i, col in enumerate(y)}
last_vals=[]
last_pos=[]
i=0
for col, vals in Ydict.items():
vals=np.array(vals)
if hatch==True:
ax.fill_between(X, vals[:,0], vals[:,1], label=col, alpha=alpha, color=colorlut[col], hatch=hatch_list[i])
else:
ax.fill_between(X, vals[:,0], vals[:,1], label=col, alpha=alpha, color=colorlut[col])
last_vals.append(vals[-1,1]-vals[-1,0])
last_pos.append(vals[-1,1]/2+vals[-1,0]/2)
i+=1
if show_values==True:
last_vals=100*np.array(last_vals)/np.sum(last_vals)
for val, pos in zip(last_vals, last_pos):
ax.text(X[-1], pos, str(np.round(val, 1))+"%")
if show_legend==True:
plt.legend(bbox_to_anchor=bbox_to_anchor)
plt.subplots_adjust(right=right, bottom=bottom)
if xlabel !="":
ax.set_xlabel(xlabel)
else:
ax.set_xlabel(x)
ax.set_ylabel(ylabel)
ax.set_title(title)
if yunit!="":
ax.text(0, 1, "({})".format(yunit), transform=ax.transAxes, ha="right")
if xunit!="":
ax.text(1, 0, "({})".format(xunit), transform=ax.transAxes, ha="left",va="top")
if xformat!="":
ax.xaxis.set_major_formatter(StrMethodFormatter(xformat))
if yformat !="":
ax.yaxis.set_major_formatter(StrMethodFormatter(yformat))
if logscalex==True:
ax.set_xscale("log")
if logscaley==True:
ax.set_yscale("log")
if inverse==True:
ax.invert_yaxis()
return {"axes":ax}
Functions
def _nested_piechart(df: pandas.core.frame.DataFrame, category: List[str], variable: str = '', palette: str = 'tab20b', figsize=[], ignore: float = 0.05, show_values: bool = False, unit: str = '', show_percentage: bool = False, colormode: str = 'independent', ax: Optional[matplotlib.axes._axes.Axes] = None, title: str = '', hatch: bool = False, show_legend: bool = False, bbox_to_anchor: list = [1.1, 1], right: float = 0.7, bottom=0.1, skip_na: bool = False, order: str = 'smallest') ‑> Dict[~KT, ~VT]
-
Drawing a nested pichart by counting the occurrence of string (or integer/boolian) values from pandas dataframe.
Parameters
df
:pandas DataFrame
category
:str
orlist
- The column names for counting values. The order of names will correspond to the hierarchical order of the pie chart.
order
:str
, optional(default: "largest") ["largest", "smallest"]
- How to sort values in the chart
ignore
:float
, optional(default: 0.05)
- Remove annotations of which the fraction is smaller than this value.
palette
:str
ordict
, optional(default: "tab20c")
- A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""}
show_values
:bool
, optional- Wheter to exhibit the values of fractions/counts/percentages.
show
:bool
, optional- Whether or not to show the figure.
figsize
:List[int]
, optional- The figure size, e.g., [4, 6].
title
:str optional, (default:"")
- The title of the figure.
hatch
:bool
, optional(default: False)
- Adding hatches to the bars
show_legend
:bool
, optional(default: False)
- Whether to show legends.
Returns
{"axes":ax}: dict
Raises
Notes
References
See Also
Examples
Expand source code
def _nested_piechart(df: pd.DataFrame, category: List[str], variable: str="", palette: str="tab20b", figsize=[], ignore: float=0.05, show_values: bool=False,unit: str="", show_percentage: bool=False, colormode: str="independent", ax: Optional[plt.Axes]=None, title: str="", hatch: bool=False, show_legend:bool=False, bbox_to_anchor: list=[1.1, 1], right: float=0.7,bottom=0.1, skip_na: bool=False, order: str="smallest",) ->Dict: """ Drawing a nested pichart by counting the occurrence of string (or integer/boolian) values from pandas dataframe. Parameters ---------- df : pandas DataFrame category: str or list The column names for counting values. The order of names will correspond to the hierarchical order of the pie chart. order: str, optional (default: "largest") ["largest", "smallest"] How to sort values in the chart ignore : float, optional (default: 0.05) Remove annotations of which the fraction is smaller than this value. palette : str or dict, optional (default: "tab20c") A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""} show_values: bool, optional Wheter to exhibit the values of fractions/counts/percentages. show : bool, optional Whether or not to show the figure. figsize : List[int], optional The figure size, e.g., [4, 6]. title: str optional, (default:"") The title of the figure. hatch: bool, optional (default: False) Adding hatches to the bars show_legend: bool, optional (default: False) Whether to show legends. Returns ------- {"axes":ax}: dict Raises ------ Notes ----- References ---------- See Also -------- Examples -------- """ #df=df.fillna("NA") alllabels={} alllabel_sets=set() for i, cat in enumerate(category): alllabels[i]=[] for u in df[cat].unique(): alllabels[i].append(u) alllabel_sets.add(u) alllabel_sets=sorted(list(alllabel_sets)) if colormode=="independent": _cmap=plt.get_cmap(palette, len(alllabel_sets)) color_lut={u: _cmap(i) for i, u in enumerate(alllabel_sets)} marker_lut={u: hatch_list[i] for i, u in enumerate(alllabel_sets)} elif colormode=="hierarchical" or colormode=="top": _cmap=plt.get_cmap(palette, len(alllabels[0])) color_lut={u: _cmap(i) for i, u in enumerate(alllabels[0])} data={} for i in range(len(category)): data[i]={"labels":[],"counts":[]} for p in it.product(*[alllabels[j] for j in range(i+1)]): tmp=df for k, _p in enumerate(p): tmp=tmp.loc[tmp[category[k]]==_p] #print(p, len(tmp)) if variable !="": data[i]["counts"].append(tmp[variable].sum()) else: data[i]["counts"].append(len(tmp)) data[i]["labels"].append(p) if ax==None: if len(figsize)==0: if (show_legend==True or hatch==True) and colormode=="independent": figsize=[8,6] else: figsize=[6,6] fig, ax = plt.subplots(figsize =figsize) else: fig=None height=1 _bottom=0 for i, d in data.items(): x=np.array(d["counts"]) y=np.ones(x.shape)*height percent=np.round(100*x/np.sum(x),2) xpi=x/np.sum(x)*2*np.pi s=np.pi/2 label=d["labels"] #print(label, x) for i, (_x,_label) in enumerate(zip(xpi,label)): if skip_na==True and _label[-1]=="NA": s-=_x continue if colormode=="independent": _color=color_lut[_label[-1]] edgecolor="white" elif colormode=="top": _color=color_lut[_label[0]] edgecolor="white" elif colormode=="hierarchical": _color=np.array(color_lut[_label[0]]) grad=(1-np.amin(_color))/(len(category)-1) _color=_color+grad*(len(_label)-1)/len(category) _color=np.where(_color>1,1.0,_color) edgecolor="darkgray" if hatch==True: _baumkuchen(ax,s, -_x, _bottom, _bottom+height, int(100*_x/np.sum(xpi)), _color,edgecolor=edgecolor, linewidth =1,hatch=marker_lut[_label[-1]]) else: _baumkuchen(ax,s, -_x, _bottom, _bottom+height, int(100*_x/np.sum(xpi)), _color,edgecolor=edgecolor, linewidth =1) txt="" if show_legend==False or colormode!="independent": txt= _label[-1] if show_values==True: if x[i]>10000 or x[i] < 0.0001: txt=txt+"\n{:.3E} {}".format(x[i], unit) else: txt=txt+"\n{:.3f} {}".format(x[i], unit) if show_percentage==True: txt=txt+"\n({}%)".format(percent[i]) txt=txt.strip("\n") showlabel=False if show_legend==False: showlabel=True elif hatch==False: showlabel=True elif colormode!="independent": showlabel=True if _x/(2*np.pi)>=ignore and showlabel==True: ax.text(np.cos(s-_x/2)*(_bottom+height/2), np.sin(s-_x/2)*(_bottom+height/2),txt, ha="center", bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) elif _x/(2*np.pi)>=ignore and (show_values==True or show_percentage==True): ax.text(np.cos(s-_x/2)*(_bottom+height/2), np.sin(s-_x/2)*(_bottom+height/2),txt, ha="center", bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) s-=_x _bottom+=height height=height*0.6 if show_legend==True or hatch==True: if colormode=="independent": for i, cat in enumerate(category): if hatch==True: handles = [ Patch(facecolor=color_lut[label], label=label, hatch=marker_lut[label]) for label in alllabels[i]] else: handles = [ Patch(facecolor=color_lut[label], label=label) for label in alllabels[i]] legend=ax.legend(handles=handles,bbox_to_anchor=[bbox_to_anchor[0], bbox_to_anchor[0]-i*0.3], title=cat, loc="center left") #legend=fig.legend(handles=handles,loc="outside right upper", title=cat) #bbox_to_anchor[1]-=0.3 ax.add_artist(legend) plt.subplots_adjust(right=right, bottom=bottom) ax.set_xticks([]) ax.set_yticks([]) ax.set_facecolor("white") if title!="": plt.title(title) return ax
def nested_piechart(df: pandas.core.frame.DataFrame, category: List[str], variable: str = '', palette: str = 'tab20b', figsize: list = [], ignore: float = 0.05, show_values: bool = False, unit: str = '', show_percentage: bool = False, colormode: str = 'independent', ax: Optional[matplotlib.axes._axes.Axes] = None, title: str = '', hatch: bool = False, show_legend: bool = False, bbox_to_anchor: list = [1.1, 1], right: float = 0.7, bottom: float = 0.1, skip_na: bool = False, order: str = 'intact') ‑> Dict[~KT, ~VT]
-
Drawing a nested pichart by counting the occurrence of string (or integer/boolian) values from pandas dataframe.
Parameters
df
:pandas DataFrame
category
:str
orlist
- The column names for counting values. The order of names will correspond to the hierarchical order of the pie chart.
order
:str
, optional(default: "largest") ["largest", "smallest", "intact"]
- How to sort values in the chart
ignore
:float
, optional(default: 0.05)
- Remove annotations of which the fraction is smaller than this value.
palette
:str
ordict
, optional(default: "tab20b")
- A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""}
show_values
:bool
, optional- Wheter to exhibit values/counts.
show_percentage
:bool
, optional- Wheter to exhibit percentages.
show
:bool
, optional- Whether or not to show the figure.
figsize
:List[int]
, optional- The figure size, e.g., [4, 6].
title
:str optional, (default:"")
- The title of the figure.
hatch
:bool
, optional(default: False)
- Adding hatches to the bars
show_legend
:bool
, optional(default: False)
- Whether to show legends.
colormode
:str
, optional(default: "independent") ["independent", "hierarchical", "top"]
- Choosing the ways to color labels. "independent" will give each label a different color (from a single color palette). "top" will give different colors to only labels in the top, and other labels follow the same color with the top category they belong to. "hierarchical" is similar to "top", but give fainter colors to labels in the lower hierarchical categories.
bbox_to_anchor
:list
, optional(default:[1.1, 1])
- the location of legends to appear
right
:float
, optional(default: 0.7)
- In the case that legends are truncated by the frame of the figure, decrease this value.
bottom
:float
, optional(default: 0.1)
skip_na
:bool
, optional(default: False)
- Whether to skip drawing samples labeled "NA". If True, there will be empty spaces for samples with "NA".
Returns
{"axes":ax}: dict
Raises
Notes
References
See Also
Examples
Expand source code
def nested_piechart(df: pd.DataFrame, category: List[str], variable: str="", palette: str="tab20b", figsize: list=[], ignore: float=0.05, show_values: bool=False,unit: str="", show_percentage: bool=False, colormode: str="independent", ax: Optional[plt.Axes]=None, title: str="", hatch: bool=False, show_legend:bool=False, bbox_to_anchor: list=[1.1, 1], right: float=0.7, bottom: float=0.1, skip_na: bool=False, order: str="intact",) ->Dict: """ Drawing a nested pichart by counting the occurrence of string (or integer/boolian) values from pandas dataframe. Parameters ---------- df : pandas DataFrame category: str or list The column names for counting values. The order of names will correspond to the hierarchical order of the pie chart. order: str, optional (default: "largest") ["largest", "smallest", "intact"] How to sort values in the chart ignore : float, optional (default: 0.05) Remove annotations of which the fraction is smaller than this value. palette : str or dict, optional (default: "tab20b") A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""} show_values: bool, optional Wheter to exhibit values/counts. show_percentage: bool, optional Wheter to exhibit percentages. show : bool, optional Whether or not to show the figure. figsize : List[int], optional The figure size, e.g., [4, 6]. title: str optional, (default:"") The title of the figure. hatch: bool, optional (default: False) Adding hatches to the bars show_legend: bool, optional (default: False) Whether to show legends. colormode: str, optional (default: "independent") ["independent", "hierarchical", "top"] Choosing the ways to color labels. "independent" will give each label a different color (from a single color palette). "top" will give different colors to only labels in the top, and other labels follow the same color with the top category they belong to. "hierarchical" is similar to "top", but give fainter colors to labels in the lower hierarchical categories. bbox_to_anchor: list, optional (default:[1.1, 1]) the location of legends to appear right: float, optional (default: 0.7) In the case that legends are truncated by the frame of the figure, decrease this value. bottom: float, optional (default: 0.1) skip_na: bool, optional (default: False) Whether to skip drawing samples labeled "NA". If True, there will be empty spaces for samples with "NA". Returns ------- {"axes":ax}: dict Raises ------ Notes ----- References ---------- See Also -------- Examples -------- """ #df=df.fillna("NA") alllabels={} alllabel_sets=set() for i, cat in enumerate(category): alllabels[i]=[] for u in df[cat].unique(): alllabels[i].append(u) alllabel_sets.add(u) alllabel_sets=sorted(list(alllabel_sets)) if colormode=="independent": _cmap=plt.get_cmap(palette, len(alllabel_sets)) color_lut={u: _cmap(i) for i, u in enumerate(alllabel_sets)} marker_lut={u: hatch_list[i] for i, u in enumerate(alllabel_sets)} elif colormode=="hierarchical" or colormode=="top": _cmap=plt.get_cmap(palette, len(alllabels[0])) color_lut={u: _cmap(i) for i, u in enumerate(alllabels[0])} # counting data in each level of categories. # {0: {'labels': [('Adelie',), ('Chinstrap',), ('Gentoo',)], # 'counts': [152, 68, 124]}, # 1: {'labels': [('Adelie', 'Male'), # ('Adelie', 'Female'), # ('Adelie', 'NA'), # ('Chinstrap', 'Male'), # ('Chinstrap', 'Female'), # ('Chinstrap', 'NA'), # ('Gentoo', 'Male'), # ('Gentoo', 'Female'), # ('Gentoo', 'NA')], # 'counts': [73, 73, 6, 34, 34, 0, 61, 58, 5]}} data={} for i in range(len(category)): data[i]={"labels":[],"counts":[]} for p in it.product(*[alllabels[j] for j in range(i+1)]): tmp=df for k, _p in enumerate(p): tmp=tmp.loc[tmp[category[k]]==_p] #print(p, len(tmp)) if variable !="": data[i]["counts"].append(tmp[variable].sum()) else: data[i]["counts"].append(len(tmp)) data[i]["labels"].append(p) if ax==None: if len(figsize)==0: if (show_legend==True or hatch==True) and colormode=="independent": figsize=[8,6] else: figsize=[6,6] fig, ax = plt.subplots(figsize =figsize) else: fig=None # Reorganizing data in order to sort the counts # _data2=[] lastk=np.amax(list(data.keys())) #print(lastk) for k, v in data.items(): v["counts"]=np.array(v["counts"]) v["labels"]=np.array(v["labels"]) if order=="largest": srt=np.argsort(v["counts"])[::-1] v["counts"]=v["counts"][srt] v["labels"]=v["labels"][srt] elif order=="smallest": srt=np.argsort(v["counts"]) v["counts"]=v["counts"][srt] v["labels"]=v["labels"][srt] elif order=="intact": pass else: raise Exception("Unknown order type.") tmp=[] latmp=[] cotmp=[] if len(_data2)==0: for l in v["labels"]: _l="|:|".join(l) latmp.append(_l) tmp.append({_l:{"labels":[],"counts":[]}}) else: # a brutal way to resort data according to the parent category order # this will not work for a large dataset for parentd in _data2[-1]: key=list(parentd.keys())[0] for l, c in zip(v["labels"], v["counts"]): _l="|:|".join(l) if _l.startswith(key): latmp.append(_l) tmp.append({_l:{"labels":[],"counts":[]}}) cotmp.append(c) if len(_data2)!=0: for la, co in zip(latmp, cotmp): parent=la.rsplit("|:|",1)[0] for parentd in _data2[-1]: if parent in parentd: #parentd[parent].append([la, co]) parentd[parent]["labels"].append(la) parentd[parent]["counts"].append(co) else: parent="top" parentd={"top":{"labels":[],"counts":[]}} for la, co in zip(latmp, v["counts"]): parentd[parent]["labels"].append(la) parentd[parent]["counts"].append(co) _data2.append([parentd]) if lastk==k: break _data2.append(tmp) #print(data) print(_data2) # Drawing the pie chart. # _data2 looks like the bellow. A nested list ordered from the top to bottom hierarchical categories (in the below, (species, sex, island)). # The element of the list is also a list consists of dictionaries (if sorted, ordered according to the parent category) of which keys are from # the parent category. And the values are dictionaries containing labels and counts. These labels are combination of those in the parent and # present categories. # # # [ # [{'top': {'labels': ['Adelie', 'Gentoo', 'Chinstrap'], 'counts': [152, 124, 68]}}], # # [{'Adelie': {'labels': ['Adelie|:|Female', 'Adelie|:|Male', 'Adelie|:|NA'], 'counts': [73, 73, 6]}}, # {'Gentoo': {'labels': ['Gentoo|:|Male', 'Gentoo|:|Female', 'Gentoo|:|NA'], 'counts': [61, 58, 5]}}, # {'Chinstrap': {'labels': ['Chinstrap|:|Female', 'Chinstrap|:|Male', 'Chinstrap|:|NA'], 'counts': [34, 34, 0]}}], # # [{'Adelie|:|Female': {'labels': ['Adelie|:|Female|:|Dream', 'Adelie|:|Female|:|Torgersen', 'Adelie|:|Female|:|Biscoe'], 'counts': [27, 24, 22]}}, # {'Adelie|:|Male': {'labels': ['Adelie|:|Male|:|Dream', 'Adelie|:|Male|:|Torgersen', 'Adelie|:|Male|:|Biscoe'], 'counts': [28, 23, 22]}}, # {'Adelie|:|NA': {'labels': ['Adelie|:|NA|:|Torgersen', 'Adelie|:|NA|:|Dream', 'Adelie|:|NA|:|Biscoe'], 'counts': [5, 1, 0]}}, # {'Gentoo|:|Male': {'labels': ['Gentoo|:|Male|:|Biscoe', 'Gentoo|:|Male|:|Torgersen', 'Gentoo|:|Male|:|Dream'], 'counts': [61, 0, 0]}}, # {'Gentoo|:|Female': {'labels': ['Gentoo|:|Female|:|Biscoe', 'Gentoo|:|Female|:|Torgersen', 'Gentoo|:|Female|:|Dream'], 'counts': [58, 0, 0]}}, # {'Gentoo|:|NA': {'labels': ['Gentoo|:|NA|:|Biscoe', 'Gentoo|:|NA|:|Dream', 'Gentoo|:|NA|:|Torgersen'], 'counts': [5, 0, 0]}}, # {'Chinstrap|:|Female': {'labels': ['Chinstrap|:|Female|:|Dream', 'Chinstrap|:|Female|:|Torgersen', 'Chinstrap|:|Female|:|Biscoe'], 'counts': [34, 0, 0]}}, # {'Chinstrap|:|Male': {'labels': ['Chinstrap|:|Male|:|Dream', 'Chinstrap|:|Male|:|Biscoe', 'Chinstrap|:|Male|:|Torgersen'], 'counts': [34, 0, 0]}}, # {'Chinstrap|:|NA': {'labels': ['Chinstrap|:|NA|:|Torgersen', 'Chinstrap|:|NA|:|Biscoe', 'Chinstrap|:|NA|:|Dream'], 'counts': [0, 0, 0]}}] # ] # # height=1 _bottom=0 for h, ds in enumerate(_data2): s=np.pi/2 for h2, dg in enumerate(ds): for h3, d in dg.items(): #print(h, h2,h3,d) x=np.array(d["counts"]) if np.sum(x)==0: continue if h==0: total=np.sum(x) y=np.ones(x.shape)*height percent=np.round(100*x/total,2) xpi=x/total*2*np.pi label=d["labels"] #print(label, x) for i, (_x,_label) in enumerate(zip(xpi,label)): _label=_label.split("|:|") if skip_na==True and _label[-1]=="NA": s-=_x continue if colormode=="independent": _color=color_lut[_label[-1]] edgecolor="white" elif colormode=="top": _color=color_lut[_label[0]] edgecolor="white" elif colormode=="hierarchical": _color=np.array(color_lut[_label[0]]) grad=(1-np.amin(_color))/(len(category)-1) _color=_color+grad*(len(_label)-1)/len(category) _color=np.where(_color>1,1.0,_color) edgecolor="darkgray" if hatch==True: _baumkuchen(ax,s, -_x, _bottom, _bottom+height, int(100*_x/np.sum(xpi)), _color,edgecolor=edgecolor, linewidth =1,hatch=marker_lut[_label[-1]]) else: _baumkuchen(ax,s, -_x, _bottom, _bottom+height, int(100*_x/np.sum(xpi)), _color,edgecolor=edgecolor, linewidth =1) txt="" if show_legend==False or colormode!="independent": txt= _label[-1] if show_values==True: if x[i]>10000 or x[i] < 0.0001: txt=txt+"\n{:.3E} {}".format(x[i], unit) else: txt=txt+"\n{:.3f} {}".format(x[i], unit) if show_percentage==True: txt=txt+"\n({}%)".format(percent[i]) txt=txt.strip("\n") showlabel=False if show_legend==False: showlabel=True elif hatch==False: showlabel=True elif colormode!="independent": showlabel=True if _x/(2*np.pi)>=ignore and showlabel: ax.text(np.cos(s-_x/2)*(_bottom+height/2), np.sin(s-_x/2)*(_bottom+height/2),txt, ha="center", bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) elif _x/(2*np.pi)>=ignore and (show_values==True or show_percentage==True): ax.text(np.cos(s-_x/2)*(_bottom+height/2), np.sin(s-_x/2)*(_bottom+height/2),txt, ha="center", bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) s-=_x _bottom+=height height=height*0.6 # Drawing legend. if hatch is True, because it will be too messy to add texts to the pie chart, it will automatically draw legends. if show_legend==True or hatch==True: if colormode=="independent": for i, cat in enumerate(category): if hatch==True: handles = [] for label in alllabels[i]: if skip_na==True and label=="NA": continue handles.append(Patch(facecolor=color_lut[label], label=label, hatch=marker_lut[label])) else: handles = [] for label in alllabels[i]: if skip_na==True and label=="NA": continue handles.append(Patch(facecolor=color_lut[label], label=label) ) legend=ax.legend(handles=handles,bbox_to_anchor=[bbox_to_anchor[0], bbox_to_anchor[0]-i*0.3], title=cat, loc="center left") #legend=fig.legend(handles=handles,loc="outside right upper", title=cat) #bbox_to_anchor[1]-=0.3 ax.add_artist(legend) plt.subplots_adjust(right=right, bottom=bottom) ax.set_xticks([]) ax.set_yticks([]) ax.set_facecolor("white") if title!="": plt.title(title) return ax
def nice_piechart(df: pandas.core.frame.DataFrame, category: Union[str, List[str]], palette: str = 'tab20c', order: str = 'largest', ncols: int = 2, ignore: float = 0.05, show_values: bool = True, title: str = '', hatch: bool = False, figsize: list = [], show_legend: bool = False, bbox_to_anchor: list = [1.1, 1], right: float = 0.7, bottom=0.1) ‑> Dict[~KT, ~VT]
-
Drawing a nice pichart by counting the occurrence of values from pandas dataframe. if you want to draw a pie chart from numerical values, try nice_piechart_num.
Parameters
df
:pandas DataFrame
category
:str
orlist
- The column names for counting values.
order
:str
, optional(default: "largest") ["largest", "smallest"]
- How to sort values in the chart
ncols
:int
, optional(default: 2)
- The column number of the figure.
ignore
:float
, optional(default: 0.05)
- Remove annotations of which the fraction is smaller than this value.
palette
:str
ordict
, optional(default: "tab20c")
- A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""}
show_values
:bool
, optional- Wheter to exhibit the values of fractions/counts/percentages.
show
:bool
, optional- Whether or not to show the figure.
figsize
:List[int]
, optional- The figure size, e.g., [4, 6].
title
:str optional, (default:"")
- The title of the figure.
hatch
:bool
, optional(default: False)
- Adding hatches to the bars
show_legend
:bool
, optional(default: False)
- Whether to show legends.
bin_num
:dict, int
, optional(default: 10)
- A histogram bin number when columns with float values are selected. You can specify the bin number of each column by using dictionary (e.g., bin_num={"A":10,"B",5}).
Returns
{"axes":ax}: dict
Raises
Notes
References
See Also
Examples
Expand source code
def nice_piechart(df: pd.DataFrame, category: Union[str, List[str]], palette: str="tab20c", order: str="largest", ncols: int=2, ignore: float=0.05, show_values: bool=True, title: str="", hatch: bool=False, figsize: list=[], show_legend:bool=False, bbox_to_anchor: list=[1.1, 1], right: float=0.7,bottom=0.1) ->Dict: """ Drawing a nice pichart by counting the occurrence of values from pandas dataframe. if you want to draw a pie chart from numerical values, try nice_piechart_num. Parameters ---------- df : pandas DataFrame category: str or list The column names for counting values. order: str, optional (default: "largest") ["largest", "smallest"] How to sort values in the chart ncols: int, optional (default: 2) The column number of the figure. ignore : float, optional (default: 0.05) Remove annotations of which the fraction is smaller than this value. palette : str or dict, optional (default: "tab20c") A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""} show_values: bool, optional Wheter to exhibit the values of fractions/counts/percentages. show : bool, optional Whether or not to show the figure. figsize : List[int], optional The figure size, e.g., [4, 6]. title: str optional, (default:"") The title of the figure. hatch: bool, optional (default: False) Adding hatches to the bars show_legend: bool, optional (default: False) Whether to show legends. bin_num: dict, int, optional (default: 10) A histogram bin number when columns with float values are selected. You can specify the bin number of each column by using dictionary (e.g., bin_num={"A":10,"B",5}). Returns ------- {"axes":ax}: dict Raises ------ Notes ----- References ---------- See Also -------- Examples -------- """ if type(category)==str: category=[category] nrows=len(category)//ncols+int(len(category)%ncols!=0) if show_legend==True or hatch==True: if len(figsize)==0: figsize=[ncols*4,nrows*3] fig, axes=plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize) else: if len(figsize)==0: figsize=[ncols*3,nrows*3] fig, axes=plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize) if title !="": fig.suptitle(title) axes=axes.flatten() for cat, ax in zip(category, axes): u, c=np.unique(df[cat], return_counts=True) srt=np.argsort(c)[::-1] u=u[srt] c=c[srt] _c=c/np.sum(c) _cmap=plt.get_cmap(palette, c.shape[0]) colors=[_cmap(i) for i in range(c.shape[0])] for j in range(c.shape[0]): if _c[j]<ignore: colors[j]=[0,0,0,1] u[j]="" continue if show_values==True: u[j]=u[j]+"\n("+str(100*np.round(_c[j],1))+"%)" if hatch==True: ax.pie(c, labels=u, counterclock=False, startangle=90, colors=colors, radius=1.25, hatch=hatch_list[:c.shape[0]], labeldistance=None) ax.legend(bbox_to_anchor=[1,1]) else: if show_legend==True: ax.pie(c, labels=u, counterclock=False, startangle=90, colors=colors, radius=1.25, labeldistance=None) ax.legend(bbox_to_anchor=[1,1]) else: ax.pie(c, labels=u, counterclock=False, startangle=90, colors=colors, labeldistance=0.6, radius=1.25) ax.set_title(cat,backgroundcolor='lavender',pad=10) if len(category)!=ncols*nrows: for i in range(ncols*nrows-len(category)): fig.delaxes(axes[-(i+1)]) # plt.tight_layout(h_pad=1) plt.subplots_adjust(top=0.9,wspace=0.5) if title !="": fig.suptitle(title) return {"axes":ax}
def nice_piechart_num(df: pandas.core.frame.DataFrame, variables: List[str] = [], category: str = '', palette: str = 'tab20c', ncols: int = 2, ignore: float = 0.05, show_values: bool = True, title: str = '', figsize=[]) ‑> Dict[~KT, ~VT]
-
Drawing a nice pichart by taking numerical values.
Parameters
df
:pandas DataFrame
- It must be a wide-form data. Each pie chart will be drawn from each row, and by default, index values are used for pie chart titles.
variables
:list
, optional- The column names to calculate fractions. If not given, all columns will be used for calculation (in this case, all columns must contain only numerical values).
order
:str
, optional(default: "largest") ["largest", "smallest"]
- How to sort values in the chart
ncols
:int
, optional(default: 2)
- The column number of the figure.
ignore
:float
, optional(default: 0.05)
- Remove annotations of which the fraction is smaller than this value.
palette
:str
ordict
, optional(default: "tab20c")
- A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""}
show_values
:bool
, optional- Wheter to exhibit the values of fractions/counts/percentages.
show
:bool
, optional- Whether or not to show the figure.
figsize
:List[int]
, optional- The figure size, e.g., [4, 6].
title
:str optional, (default:"")
- The title of the figure.
hatch
:bool
, optional(default: False)
- Adding hatches to the bars
show_legend
:bool
, optional(default: True)
- Whether to show legends.
Returns
dict {"axes":ax}
Raises
Notes
References
See Also
Examples
Expand source code
def nice_piechart_num(df: pd.DataFrame, variables: List[str]=[], category: str="" , palette: str="tab20c", ncols: int=2, ignore: float=0.05, show_values: bool=True, title: str="", figsize=[]) ->Dict: """ Drawing a nice pichart by taking numerical values. Parameters ---------- df : pandas DataFrame It must be a wide-form data. Each pie chart will be drawn from each row, and by default, index values are used for pie chart titles. variables: list, optional The column names to calculate fractions. If not given, all columns will be used for calculation (in this case, all columns must contain only numerical values). order: str, optional (default: "largest") ["largest", "smallest"] How to sort values in the chart ncols: int, optional (default: 2) The column number of the figure. ignore : float, optional (default: 0.05) Remove annotations of which the fraction is smaller than this value. palette : str or dict, optional (default: "tab20c") A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""} show_values: bool, optional Wheter to exhibit the values of fractions/counts/percentages. show : bool, optional Whether or not to show the figure. figsize : List[int], optional The figure size, e.g., [4, 6]. title: str optional, (default:"") The title of the figure. hatch: bool, optional (default: False) Adding hatches to the bars show_legend: bool, optional (default: True) Whether to show legends. Returns ------- dict {"axes":ax} Raises ------ Notes ----- References ---------- See Also -------- Examples -------- """ if category=="": category=list(df.index) else: df=df.set_index(category) category=list(df.index) if len(variables)!=0: df=df[variables] srt=np.argsort(df.sum(axis=0))[::-1] df=df[df.columns[srt]] variables=list(df.columns) nrows=len(category)//ncols+int(len(category)%ncols!=0) if len(figsize)==0: figsize=[ncols*2,nrows*2] fig, axes=plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize) axes=axes.flatten() _cmap=plt.get_cmap(palette, len(variables)) colors=[_cmap(i) for i in range(len(variables))] for cat, ax in zip(category, axes): c=df.loc[cat] _c=c/np.sum(c) ax.pie(c, counterclock=False, startangle=90, colors=colors, labeldistance=0.6, radius=1.25) ax.set_title(cat,backgroundcolor='lavender',pad=8) if len(category)%ncols!=0: for i in range(len(category)%ncols-2): fig.delaxes(axes[-(i+1)]) plt.tight_layout(h_pad=1) plt.subplots_adjust(top=0.95, right=0.81) legend_elements = [Line2D([0], [0], marker='o', color='lavender', label=huelabel,markerfacecolor=color, markersize=10) for color, huelabel in zip(colors, variables)] fig.legend(handles=legend_elements,bbox_to_anchor=(1, 1)) if title!="": plt.title(title) return {"axes":ax}
def stacked_barplot(df: pandas.core.frame.DataFrame, x: Union[str, list], hue: Union[str, list], scale: str = 'fraction', order: Optional[list] = None, hue_order: Optional[list] = None, test_pairs: List[List[str]] = [], palette: Union[str, dict] = 'tab20c', show_values: bool = True, show_values_intact: bool = False, show: bool = False, figsize: List[int] = [], xunit: str = '', yunit: str = '', title: str = '', hatch: bool = False, rotation: int = 90, ax: Optional[matplotlib.axes._axes.Axes] = None, show_legend: bool = True, bin_num: Union[dict, int] = 10, ylim: Optional[int] = None) ‑> Dict[~KT, ~VT]
-
Drawing a stacked barplot by counting observations with or without the fisher's exact test.
Parameters
df
:pandas DataFrame
x
:str
orlist
- The category to place in x axis. Multiple categories can be passed by a list. It mainly works with categorical values, including strings boolians and integers, but also can take float values by automatically creating a histogram.
hue
:str
orlist
- Counting samples by the hue category. Multiple categories can be passed by a list.
order
:list
, optional- The order of x axis labels
hue_order
:list
, optional- The order of hue labels
scale
:str
, optional- Scaling method. Available options are: fraction, percentage, absolute
test_pairs
:list
, optional- pairs of categorical values related to x. It will calculate -log10 (p value) (mlp) of the fisher exact test. Examples: [["Adelie","Chinstrap" ], ["Gentoo","Chinstrap" ], ["Adelie","Gentoo" ]]
palette
:str
ordict
, optional(default: "tab20c")
- A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""}
show_values
:bool
, optional- Wheter to exhibit the values of fractions/counts/percentages.
show_values_intact
:bool
, optional- Wheter to exhibit non-scaled counts.
show
:bool
, optional- Whether or not to show the figure.
figsize
:List[int]
, optional- The figure size, e.g., [4, 6].
title
:str optional, (default:"")
- The title of the figure.
hatch
:bool
, optional(default: False)
- Adding hatches to the bars
rotation
:int
, optional(default:90)
- The orientation of the x axis labels.
ax
:plt.Axes
, optional(default: None)
- The ax object to be plotted.
show_legend
:bool
, optional(default: True)
- Whether to show legends.
bin_num
:dict, int
, optional(default: 10)
- A histogram bin number when columns with float values are selected. You can specify the bin number of each column by using dictionary (e.g., bin_num={"A":10,"B",5}).
Returns
{"pval":pvals,"axes":ax,"data":data} : dict
Raises
Notes
References
See Also
Examples
Expand source code
def stacked_barplot(df: pd.DataFrame, x: Union[str, list], hue: Union[str, list], scale: str="fraction", order: Optional[list]=None, hue_order: Optional[list]=None, test_pairs: List[List[str]]=[], palette: Union[str,dict]="tab20c", show_values: bool=True, show_values_intact: bool=False, show: bool=False, figsize: List[int]=[], xunit: str="", yunit: str="", title: str="", hatch: bool=False, rotation: int=90, ax: Optional[plt.Axes]=None, show_legend:bool=True, bin_num: Union[dict, int]=10, ylim: Optional[int]=None)-> Dict: """ Drawing a stacked barplot by counting observations with or without the fisher's exact test. Parameters ---------- df : pandas DataFrame x: str or list The category to place in x axis. Multiple categories can be passed by a list. It mainly works with categorical values, including strings boolians and integers, but also can take float values by automatically creating a histogram. hue: str or list Counting samples by the hue category. Multiple categories can be passed by a list. order: list, optional The order of x axis labels hue_order: list, optional The order of hue labels scale: str, optional Scaling method. Available options are: fraction, percentage, absolute test_pairs : list, optional pairs of categorical values related to x. It will calculate -log10 (p value) (mlp) of the fisher exact test. Examples: [["Adelie","Chinstrap" ], ["Gentoo","Chinstrap" ], ["Adelie","Gentoo" ]] palette : str or dict, optional (default: "tab20c") A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""} show_values: bool, optional Wheter to exhibit the values of fractions/counts/percentages. show_values_intact: bool, optional Wheter to exhibit non-scaled counts. show : bool, optional Whether or not to show the figure. figsize : List[int], optional The figure size, e.g., [4, 6]. title: str optional, (default:"") The title of the figure. hatch: bool, optional (default: False) Adding hatches to the bars rotation: int, optional (default:90) The orientation of the x axis labels. ax: plt.Axes, optional (default: None) The ax object to be plotted. show_legend: bool, optional (default: True) Whether to show legends. bin_num: dict, int, optional (default: 10) A histogram bin number when columns with float values are selected. You can specify the bin number of each column by using dictionary (e.g., bin_num={"A":10,"B",5}). Returns ------- {"pval":pvals,"axes":ax,"data":data} : dict Raises ------ Notes ----- References ---------- See Also -------- Examples -------- """ if type(df)==Dict: df=pd.DataFrame(df) if type(x)==str: x=[x] if order!=None: order=[order] if type(hue)==str: hue=[hue] if hue_order!=None: hue_order=[hue_order] for _x in x: if df[_x].dtype==float: if type(bin_num)==dict: df[_x]=_float2cat(df[_x].values,bin_num=bin_num[_x]) else: df[_x]=_float2cat(df[_x].values,bin_num=bin_num) elif df[_x].isnull().values.any(): df[_x]=df[_x].replace(np.nan, "NA") for _hue in hue: if df[_hue].dtype==float: if type(bin_num)==dict: df[_hue]=_float2cat(df[_hue].values,bin_num=bin_num[_x]) else: df[_hue]=_float2cat(df[_hue].values,bin_num=bin_num) elif df[_hue].isnull().values.any(): df[_hue]=df[_hue].replace(np.nan, "NA") data: dict={} xkeys: dict={} keysx: dict={} meankey_len=0 for i, _x in enumerate(x): if order==None: u=np.unique(df[_x]) keys=sorted(list(u)) else: keys=order[i] meankey_len+=len(keys) xkeys[_x]=keys for k in keys: keysx[k]=_x meankey_len=meankey_len//len(x) huekeys={} for i, _hue in enumerate(hue): if hue_order==None: u=np.unique(df[_hue]) hues=sorted(list(u)) else: hues=hue_order[i] huekeys[_hue]=hues for _x, keys in xkeys.items(): data[_x]={} for key in keys: for _hue, hues in huekeys.items(): if _x==_hue: continue if not _hue in data[_x]: data[_x][_hue]={} data[_x][_hue][key]=[] for h in hues: data[_x][_hue][key].append(np.sum((df[_x]==key) & (df[_hue]==h))) pvals={} if len(test_pairs) >0: print("mlp stands for -log10(p value)") for _hue in hue: for i, h in enumerate(huekeys[_hue]): for p1,p2 in test_pairs: _x=keysx[p1] __x=keysx[p2] if _x!=__x: raise Exception("{} and {} can not be compared.".format(p1, p2)) if _x==_hue: continue if not _x in pvals: pvals[_x]={} if not _hue in pvals[_x]: pvals[_x][_hue]={} if not h in pvals[_x][_hue]: pvals[_x][_hue][h]=[] keys=xkeys[_x] idx1=xkeys[_x].index(p1) idx2=xkeys[_x].index(p2) yes_total=np.sum(data[_x][_hue][keys[idx1]]) no_total=np.sum(data[_x][_hue][keys[idx2]]) yes_and_hue=data[_x][_hue][keys[idx1]][i] no_and_hue=data[_x][_hue][keys[idx2]][i] table=[[yes_and_hue, no_and_hue], [yes_total-yes_and_hue, no_total-no_and_hue]] odd, pval=fisher_exact(table) pvals[_x][_hue][h].append([idx1, idx2, pval]) if scale=="fraction": for _x in x: for _hue in hue: if _x==_hue: continue for key in keys: data[_x][_hue][key]=np.array(data[_x][_hue][key])/np.sum(data[_x][_hue][key]) elif scale=="percentage": for _x in x: for _hue in hue: if _x==_hue: continue for key in keys: data[_x][_hue][key]=np.array(data[_x][_hue][key])/np.sum(data[_x][_hue][key])*100 if type(palette)==str: cmap=plt.get_cmap(palette) ncols=len(x)*len(hue)-len(set(x)&set(hue)) if ncols<1: ncols=1 if len(figsize)==0: figsize=[(4+0.1*meankey_len)*ncols, 6] if ax!=None: axes=ax fig=None else: fig, axes=plt.subplots(figsize=figsize,ncols=ncols) if ncols==1: axes=[axes] else: axes=axes.flatten() if scale=="absolute": unit="" elif scale=="fraction": unit="" elif scale=="percentage": unit="%" axindex=0 pos={} #_hatch_list=[hatch_list[i] for i in range(len(keys))] for _x in x: pos[_x]={} for _hue in hue: if _x==_hue: continue pos[_x][_hue]={} keys=xkeys[_x] hues=huekeys[_hue] bottom=np.zeros([len(keys)]) for i, h in enumerate(hues): ax=axes[axindex] heights=np.array([data[_x][_hue][key][i] for key in keys]) if type(palette)==dict: if hatch==True: ax.bar(keys, heights,width=0.5, bottom=bottom, color=palette[h], label=h, hatch=hatch_list[i]) else: ax.bar(keys, heights,width=0.5, bottom=bottom, color=palette[h], label=h) else: if hatch==True: ax.bar(keys, heights,width=0.5, bottom=bottom, color=cmap(i/len(hues)), label=h, hatch=hatch_list[i]) else: ax.bar(keys, heights,width=0.5, bottom=bottom, color=cmap(i/len(hues)), label=h) if show_values==True: for j in range(len(keys)): if scale=="absolute": ax.text(j,bottom[j]+heights[j]/2,"{}{}".format(heights[j],unit), bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) else: ax.text(j,bottom[j]+heights[j]/2,"{:.2f}{}".format(heights[j],unit), bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) pos[_x][_hue][h]={key: [he, bo] for key, he, bo in zip(keys, heights, bottom)} bottom+=heights if len(keys)==1: ax.set_xticks(ax.get_xticks(), labels=keys, rotation=rotation) ax.margins(x=1) else: ax.set_xticks(ax.get_xticks(), labels=keys, rotation=rotation) #ax.set_xticklabels(ax.get_xticklabels(), rotation=90) if show_legend==True: ax.legend(title=_hue,loc=[1.01,0]) ax.set_xlabel(_x) if scale=="absolute": ylabel="Counts" elif scale=="fraction": ylabel="Fraction" elif scale=="percentage": ylabel="Percentage" ax.set_ylabel(ylabel) axindex+=1 if len(pvals)>0 and _x in pvals: for _hue in hue: if _x==_hue: continue if not _hue in pos[_x]: continue hues=huekeys[_hue] for i, h in enumerate(hues): #print(pos) #print(pos[_x]) _pos=pos[_x][_hue][h] for idx1, idx2, pval in pvals[_x][_hue][h]: he1, bot1=_pos[keys[idx1]] he2, bot2=_pos[keys[idx2]] line, =ax.plot([idx1,idx2],[he1/2+bot1,he2/2+bot2],color="gray") # r1=ax.transData.transform([idx1, he1/2+bot1]) # r2=ax.transData.transform([idx2, he2/2+bot2]) r1=np.array([idx1, he1/2+bot1]) r2=np.array([idx2, he2/2+bot2]) r=r2-r1 #print(ax.get_xlim(),ax.get_ylim()) r=np.array([1,3])*r/np.array([ax.get_xlim()[1]-ax.get_xlim()[0],ax.get_ylim()[1]-ax.get_ylim()[0]]) #r=ax.transData.transform(r) if idx2<idx1: r=-r #print(r) r=r*(r @ r)**(-0.5) #print(h,r) angle=np.arccos(r[0]) if r[1]<0: angle= -angle #print(angle) if pval < 0.05: pval_str=str(np.round(-np.log10(pval), decimals=1)) else: pval_str="ns" _line_annotate( "mlp="+pval_str, line, (idx1+idx2)/2, color="magenta") if scale=="absolute" and ylim !=None: ax.set_ylim(0, ylim) if len(x)==1 and len(hue)==1: plt.subplots_adjust(top=0.93,left=0.1,right=0.7, bottom=0.17) else: plt.tight_layout(w_pad=2) if title !="" and fig !=None: fig.suptitle(title) if show: plt.show() return {"pval":pvals,"axes":ax,"data":data}
def stacked_barplot_num(df: pandas.core.frame.DataFrame, x: str = '', scale: str = 'fraction', test_pairs: List[List[str]] = [], palette: Union[str, dict] = 'tab20c', show_values: bool = True, show_values_intact: bool = False, show: bool = False, figsize: List[int] = [], xlabel: str = '', ylabel: str = '', unit: Union[str, List[~T], Dict[~KT, ~VT]] = '', title: str = '', hatch: bool = False, rotation: Optional[int] = None, ax: Optional[matplotlib.axes._axes.Axes] = None, show_legend: bool = True, ylim: Optional[int] = None, horizontal: bool = False, margins: dict = {}, gridspec_kw: dict = {}, legend_row_num: int = 2, bar_width: float = 0.75) ‑> Dict[~KT, ~VT]
-
Drawing a stacked barplot by taking values as an input with or without the fisher's exact test.
Parameters
df
:pandas DataFrame
- A wide-form data format. e.g., Bournemouth Brighton Possession 36 64 Shots 14 16 Shots on Target 3 6 Corners 5 5 Fouls 11 5
x
:str
- The category to place in x axis. If not set, it will use the index of the input dataframe.
scale
:str
, optional- Scaling method. Available options are: fraction, percentage, absolute
test_pairs
:list
, optional- pairs of categorical values related to x. It will calculate -log10 (p value) (mlp) of the fisher exact test. Examples: [["Adelie","Chinstrap" ], ["Gentoo","Chinstrap" ], ["Adelie","Gentoo" ]]
palette
:str
ordict
, optional(default: "tab20c")
- A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""}
show_values
:bool
, optional- Wheter to exhibit the values of fractions/counts/percentages.
show_values_intact
:bool
, optional- Wheter to exhibit non-scaled values.
show
:bool
, optional- Whether or not to show the figure.
figsize
:List[int]
, optional- The figure size, e.g., [4, 6].
title
:str optional, (default:"")
- The title of the figure.
hatch
:bool
, optional(default: False)
- Adding hatches to the bars
rotation
:int
, optional(default:90)
- The orientation of the x axis labels.
ax
:plt.Axes
, optional(default: None)
- The ax object to be plotted.
show_legend
:bool
, optional(default: True)
- Whether to show legends.
bin_num
:dict, int
, optional(default: 10)
- A histogram bin number when columns with float values are selected. You can specify the bin number of each column by using dictionary (e.g., bin_num={"A":10,"B",5}).
Returns
{"pval":pvals,"axes":ax,"data":data}: dict
Raises
Notes
References
See Also
Examples
Expand source code
def stacked_barplot_num(df: pd.DataFrame, x: str="", scale: str="fraction", test_pairs: List[List[str]]=[], palette: Union[str,dict]="tab20c", show_values: bool=True, show_values_intact: bool=False, show: bool=False, figsize: List[int]=[], xlabel: str="", ylabel: str="", unit: Union[str, List, Dict]="", title: str="", hatch: bool=False, rotation: Optional[int]=None, ax: Optional[plt.Axes]=None, show_legend:bool=True, ylim: Optional[int]=None, horizontal: bool=False, margins: dict={}, gridspec_kw: dict={}, legend_row_num: int=2, bar_width: float=0.75 )-> Dict: """ Drawing a stacked barplot by taking values as an input with or without the fisher's exact test. Parameters ---------- df : pandas DataFrame A wide-form data format. e.g., Bournemouth Brighton Possession 36 64 Shots 14 16 Shots on Target 3 6 Corners 5 5 Fouls 11 5 x: str The category to place in x axis. If not set, it will use the index of the input dataframe. scale: str, optional Scaling method. Available options are: fraction, percentage, absolute test_pairs : list, optional pairs of categorical values related to x. It will calculate -log10 (p value) (mlp) of the fisher exact test. Examples: [["Adelie","Chinstrap" ], ["Gentoo","Chinstrap" ], ["Adelie","Gentoo" ]] palette : str or dict, optional (default: "tab20c") A matplotlib colormap name or dictionary in which keys are values of the hue category and values are RGB array. e.g.) palette={""} show_values: bool, optional Wheter to exhibit the values of fractions/counts/percentages. show_values_intact: bool, optional Wheter to exhibit non-scaled values. show : bool, optional Whether or not to show the figure. figsize : List[int], optional The figure size, e.g., [4, 6]. title: str optional, (default:"") The title of the figure. hatch: bool, optional (default: False) Adding hatches to the bars rotation: int, optional (default:90) The orientation of the x axis labels. ax: plt.Axes, optional (default: None) The ax object to be plotted. show_legend: bool, optional (default: True) Whether to show legends. bin_num: dict, int, optional (default: 10) A histogram bin number when columns with float values are selected. You can specify the bin number of each column by using dictionary (e.g., bin_num={"A":10,"B",5}). Returns ------- {"pval":pvals,"axes":ax,"data":data}: dict Raises ------ Notes ----- References ---------- See Also -------- Examples -------- """ if type(df)==Dict: df=pd.DataFrame(df) pvals={} # if len(test_pairs) >0: # print("mlp stands for -log10(p value)") # for _hue in hue: # for i, h in enumerate(huekeys[_hue]): # for p1,p2 in test_pairs: # yes_total=np.sum(data[_x][_hue][keys[idx1]]) # no_total=np.sum(data[_x][_hue][keys[idx2]]) # yes_and_hue=data[_x][_hue][keys[idx1]][i] # no_and_hue=data[_x][_hue][keys[idx2]][i] # table=[[yes_and_hue, no_and_hue], # [yes_total-yes_and_hue, no_total-no_and_hue]] # odd, pval=fisher_exact(table) # pvals[_x][_hue][h].append([idx1, idx2, pval]) if len(gridspec_kw)==0: if horizontal==True: gridspec_kw=dict(top=0.75,left=0.25,right=0.9, bottom=0.17) else: gridspec_kw=dict(top=0.93,left=0.1,right=0.7, bottom=0.17) if len(margins)==0: if horizontal==True and scale!="absolute": margins=dict(x=0) else: margins=dict(x=0.1) if len(figsize)==0: if horizontal==True: figsize=[6, len(df.index)/2] else: figsize=[len(df.columns)+1, 6] if ax!=None: fig=None else: fig, ax=plt.subplots(figsize=figsize,gridspec_kw=gridspec_kw) plt.margins(**margins) if unit=="": if scale=="absolute": unit="" elif scale=="fraction": unit="" elif scale=="percentage": unit="%" pos={} if x=="": keys=list(df.index) _df=df else: keys=list(df[x]) _df=df.drop(columns=[x]) if scale=="fraction": _df=_df.div(_df.sum(axis=1),axis = 'rows',) elif scale=="percentage": _df=100*_df.div(_df.sum(axis=1),axis = 'rows',) if type(palette)==str: cmap=plt.get_cmap(palette, len(_df.columns)) pos={} bottom=np.zeros([len(keys)]) for i, col in enumerate(_df.columns): # print(col) heights=_df[col].values # print(heights,bottom) originalval=df[col].values if horizontal==True: if type(palette)==dict: if hatch==True: ax.barh(keys, width=heights,height=bar_width, left=bottom, color=palette[col], label=col, hatch=hatch_list[i]) else: ax.barh(keys, width=heights,height=bar_width, left=bottom, color=palette[col], label=col) else: if hatch==True: ax.barh(keys, width=heights,height=bar_width, left=bottom, color=cmap(i), label=col, hatch=hatch_list[i]) else: ax.barh(keys, width=heights,height=bar_width, left=bottom, color=cmap(i), label=col) else: if type(palette)==dict: if hatch==True: ax.bar(keys, heights,width=bar_width, bottom=bottom, color=palette[col], label=col, hatch=hatch_list[i]) else: ax.bar(keys, heights,width=bar_width, bottom=bottom, color=palette[col], label=col) else: if hatch==True: ax.bar(keys, heights,width=bar_width, bottom=bottom, color=cmap(i), label=col, hatch=hatch_list[i]) else: ax.bar(keys, heights,width=bar_width, bottom=bottom, color=cmap(i), label=col) if show_values==True: for j, k in enumerate(keys): if type(unit)==dict: if k in unit: u=unit[k] else: u="" elif type(unit)==list: u=unit[j] else: u=unit if horizontal==True: if show_values_intact==True: ax.text(bottom[j]+heights[j]/2, j,"{}{}".format(originalval[j],u), va="center",ha="center", bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) else: ax.text(bottom[j]+heights[j]/2,j,"{:.2f}{}".format(heights[j],u), va="center",ha="center", bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) else: if show_values_intact==True: ax.text(j,bottom[j]+heights[j]/2,"{}{}".format(originalval[j],u), ha="center", bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) else: ax.text(j,bottom[j]+heights[j]/2,"{:.2f}{}".format(heights[j],u), ha="center", bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="y", lw=1, alpha=0.8)) pos[col]={key: [he, bo] for key, he, bo in zip(keys, heights, bottom)} bottom+=heights #ax.set_xticklabels(ax.get_xticklabels(), rotation=90) if show_legend==True: if horizontal==True: ax.legend(loc=[0,1.01], ncols=len(_df.columns)//legend_row_num+int(len(_df.columns)%legend_row_num!=0)) else: ax.legend(loc=[1.01,0]) if horizontal==True: if rotation==None: rotation=0 if len(keys)==1: ax.set_yticks(ax.get_yticks(), labels=keys, rotation=rotation) ax.margins(x=1) else: ax.set_yticks(ax.get_yticks(), labels=keys, rotation=rotation) ax.set_ylabel(ylabel) if xlabel =="": if show_values_intact==False: if scale=="absolute": xlabel="Counts" elif scale=="fraction": xlabel="Fraction" elif scale=="percentage": xlabel="Percentage" ax.set_xlabel(xlabel) else: if rotation==None: rotation=90 if len(keys)==1: ax.set_xticks(ax.get_xticks(), labels=keys, rotation=rotation) ax.margins(x=1) else: ax.set_xticks(ax.get_xticks(), labels=keys, rotation=rotation) ax.set_xlabel(xlabel) if ylabel =="": if show_values_intact==False: if scale=="absolute": ylabel="Counts" elif scale=="fraction": ylabel="Fraction" elif scale=="percentage": ylabel="Percentage" ax.set_ylabel(ylabel) # if len(pvals)>0: # for _hue in hue: # if _x==_hue: # continue # if not _hue in pos[_x]: # continue # hues=huekeys[_hue] # for i, h in enumerate(hues): # #print(pos) # #print(pos[_x]) # _pos=pos[_x][_hue][h] # for idx1, idx2, pval in pvals[_x][_hue][h]: # he1, bot1=_pos[keys[idx1]] # he2, bot2=_pos[keys[idx2]] # line, =ax.plot([idx1,idx2],[he1/2+bot1,he2/2+bot2],color="gray") # # r1=ax.transData.transform([idx1, he1/2+bot1]) # # r2=ax.transData.transform([idx2, he2/2+bot2]) # r1=np.array([idx1, he1/2+bot1]) # r2=np.array([idx2, he2/2+bot2]) # r=r2-r1 # #print(ax.get_xlim(),ax.get_ylim()) # r=np.array([1,3])*r/np.array([ax.get_xlim()[1]-ax.get_xlim()[0],ax.get_ylim()[1]-ax.get_ylim()[0]]) # #r=ax.transData.transform(r) # if idx2<idx1: # r=-r # #print(r) # r=r*(r @ r)**(-0.5) # #print(h,r) # angle=np.arccos(r[0]) # if r[1]<0: # angle= -angle # #print(angle) # if pval < 0.05: # pval_str=str(np.round(-np.log10(pval), decimals=1)) # else: # pval_str="ns" # _line_annotate( "mlp="+pval_str, line, (idx1+idx2)/2, color="magenta") if scale=="absolute" and ylim !=None: ax.set_ylim(0, ylim) if horizontal==True: ax.invert_yaxis() if title !="" and fig !=None: fig.suptitle(title) if show: plt.show() return {"pval":pvals,"axes":ax}
def stackedlines(df: pandas.core.frame.DataFrame, x: str, y: list, sort: bool = True, inverse: bool = False, show_values: bool = False, remove_all_zero: bool = False, palette: str = 'tab20c', figsize=[7, 4], ax: Optional[matplotlib.axes._axes.Axes] = None, alpha: float = 0.75, bbox_to_anchor: list = [1.7, 1], right: float = 0.7, bottom: float = 0.12, show_legend: bool = True, xlabel: str = '', ylabel: str = '', yunit: str = '', xunit: str = '', xformat: str = '', yformat: str = '', logscalex: bool = False, logscaley: bool = False, title: str = '', hatch: bool = False)
-
Drawing a scatter plot of which points are represented by pie charts.
Parameters
df
:pandas DataFrame
- A wide form dataframe. It accepts negative values. e.g.) year biofuel_consumption coal_consumption gas_consumption hydro_consumption nuclear_consumption oil_consumption 90 1990 16.733 5337.998 5170.609 864.271 1723.004 9306.913 91 1991 19.389 5287.613 5283.972 849.620 1829.645 9108.509 92 1992 22.045 5324.031 5463.509 743.463 1848.197 9297.387 93 1993 25.759 5522.452 5599.419 825.742 1822.853 9376.045 94 1994 28.846 5543.144 5731.081 766.870 1912.903 9619.746 95 1995 30.942 5593.053 5979.829 920.274 2011.356 9597.527
x
:str
- the name of a column to be the x axis of the plot.
y
:list
- the names of values to display as stacked lines
sort
:bool
, optional(default: True)
- Whether to sort lines based on their values
show_values
:bool
, optional(default: False)
- Whether to show percentages at the end of lines.
remove_all_zero
:bool
, optional(default: False)
pie_palette
:str
- A colormap name
xlabel
:str
, optional- x axis label
ylabel
:str
, optional- y axis label
ax
:Optional[plt.Axes] optional, (default: None)
- pyplot ax to add this scatter plot
alpha
:float
, optional(default: 0.75)
- alpha of the stacked lines (areas).
bbox_to_anchor
:list
, optional(default: [1.7, 1])
- The legend location
right
:float=0.7,
bottom
:float=0.120,
show_legend
:bool=True,
yunit
:str=""
, optional(default: "")
- The y axis unit. it will appear at the top of the y axis.
xunit
:str=""
, optional(default: "")
- The X axis unit. it will appear at the top of the x axis.
title
:str=""
, optional(default: "")
- The figure title.
hatch
:bool
, optional(default: False)
- Whether to add hatches to the plots.
Returns
dict
Raises
Notes
References
See Also
Examples
Expand source code
def stackedlines(df: pd.DataFrame, x: str, y: list, sort: bool=True, inverse: bool=False, show_values: bool=False, remove_all_zero: bool=False, palette: str="tab20c", figsize=[7,4], ax: Optional[plt.Axes]=None, alpha: float=0.75, bbox_to_anchor: list=[1.7, 1], right: float=0.7, bottom: float=0.120, show_legend: bool=True, xlabel: str="", ylabel: str="", yunit: str="", xunit: str="", xformat: str="", yformat: str="", logscalex: bool=False, logscaley: bool=False, title: str="", hatch: bool=False): """ Drawing a scatter plot of which points are represented by pie charts. Parameters ---------- df : pandas DataFrame A wide form dataframe. It accepts negative values. e.g.) year biofuel_consumption coal_consumption gas_consumption hydro_consumption nuclear_consumption oil_consumption 90 1990 16.733 5337.998 5170.609 864.271 1723.004 9306.913 91 1991 19.389 5287.613 5283.972 849.620 1829.645 9108.509 92 1992 22.045 5324.031 5463.509 743.463 1848.197 9297.387 93 1993 25.759 5522.452 5599.419 825.742 1822.853 9376.045 94 1994 28.846 5543.144 5731.081 766.870 1912.903 9619.746 95 1995 30.942 5593.053 5979.829 920.274 2011.356 9597.527 x : str the name of a column to be the x axis of the plot. y: list the names of values to display as stacked lines sort: bool, optional (default: True) Whether to sort lines based on their values show_values: bool, optional (default: False) Whether to show percentages at the end of lines. remove_all_zero: bool, optional (default: False) pie_palette : str A colormap name xlabel: str, optional x axis label ylabel: str, optional y axis label ax: Optional[plt.Axes] optional, (default: None) pyplot ax to add this scatter plot alpha: float, optional (default: 0.75) alpha of the stacked lines (areas). bbox_to_anchor: list, optional (default: [1.7, 1]) The legend location right: float=0.7, bottom: float=0.120, show_legend: bool=True, yunit: str="", optional (default: "") The y axis unit. it will appear at the top of the y axis. xunit: str="", optional (default: "") The X axis unit. it will appear at the top of the x axis. title: str="", optional (default: "") The figure title. hatch: bool, optional (default: False) Whether to add hatches to the plots. Returns ------- dict Raises ------ Notes ----- References ---------- See Also -------- Examples -------- """ df=df.fillna(0) X=np.array(df[x]) Y=[] for col in y: Y.append(np.array(df[col])) Y=np.array(Y) if remove_all_zero==True: _filter=Y.sum(axis=0)!=0 Y=Y[:,_filter] X=X[_filter] Ydict={col:[] for col in y} if sort==True: if inverse==True: for i, _x in enumerate(X): _Y=Y[:,i] srtidx=np.argsort(_Y)[::-1] _bottom=0 _bottom_neg=0 for _idx in srtidx: _col=y[_idx] yval=Y[_idx,i] if yval>=0: Ydict[_col].append([_bottom, yval+_bottom]) _bottom+=yval else: Ydict[_col].append([_bottom_neg, yval+_bottom_neg]) _bottom_neg+=yval else: for i, _x in enumerate(X): _Y=Y[:,i] srtidx=np.argsort(_Y) _bottom=0 _bottom_neg=0 for _idx in srtidx: _col=y[_idx] yval=Y[_idx,i] if yval>=0: Ydict[_col].append([_bottom, yval+_bottom]) _bottom+=yval else: Ydict[_col].append([_bottom_neg, yval+_bottom_neg]) _bottom_neg+=yval else: for i, _x in enumerate(X): _Y=Y[:,i] _bottom=0 _bottom_neg=0 for _idx,_col in enumerate(y): yval=Y[_idx,i] if yval>=0: Ydict[_col].append([_bottom, yval+_bottom]) _bottom+=yval else: Ydict[_col].append([_bottom_neg, yval+_bottom_neg]) _bottom_neg+=yval if ax ==None: fig, ax=plt.subplots(figsize=figsize) cmap=plt.get_cmap(palette, len(y)) colorlut={col: cmap(i) for i, col in enumerate(y)} last_vals=[] last_pos=[] i=0 for col, vals in Ydict.items(): vals=np.array(vals) if hatch==True: ax.fill_between(X, vals[:,0], vals[:,1], label=col, alpha=alpha, color=colorlut[col], hatch=hatch_list[i]) else: ax.fill_between(X, vals[:,0], vals[:,1], label=col, alpha=alpha, color=colorlut[col]) last_vals.append(vals[-1,1]-vals[-1,0]) last_pos.append(vals[-1,1]/2+vals[-1,0]/2) i+=1 if show_values==True: last_vals=100*np.array(last_vals)/np.sum(last_vals) for val, pos in zip(last_vals, last_pos): ax.text(X[-1], pos, str(np.round(val, 1))+"%") if show_legend==True: plt.legend(bbox_to_anchor=bbox_to_anchor) plt.subplots_adjust(right=right, bottom=bottom) if xlabel !="": ax.set_xlabel(xlabel) else: ax.set_xlabel(x) ax.set_ylabel(ylabel) ax.set_title(title) if yunit!="": ax.text(0, 1, "({})".format(yunit), transform=ax.transAxes, ha="right") if xunit!="": ax.text(1, 0, "({})".format(xunit), transform=ax.transAxes, ha="left",va="top") if xformat!="": ax.xaxis.set_major_formatter(StrMethodFormatter(xformat)) if yformat !="": ax.yaxis.set_major_formatter(StrMethodFormatter(yformat)) if logscalex==True: ax.set_xscale("log") if logscaley==True: ax.set_yscale("log") if inverse==True: ax.invert_yaxis() return {"axes":ax}