Extending the solutions by Kyrubas and hwang you can also once define a function scattered_boxplot
(and add it as a method to plt.Axes
), such that you can always use scattered_boxplot
instead of boxplot
:
fig, ax = plt.subplots(figsize=(5, 6))
ax.scattered_boxplot(x=[np.array([1,2,3]*50),np.array([1.1,2.2,3.3])])
The function scattered_boxplot
can be defined as follows only using matplotlib
:
import matplotlib.pyplot as plt
import numpy as np
from numbers import Number
def scattered_boxplot(ax, x, notch=None, sym=None, vert=None, whis=None, positions=None, widths=None, patch_artist=None, bootstrap=None, usermedians=None, conf_intervals=None, meanline=None, showmeans=None, showcaps=None, showbox=None,
showfliers="unif",
hide_points_within_whiskers=False,
boxprops=None, labels=None, flierprops=None, medianprops=None, meanprops=None, capprops=None, whiskerprops=None, manage_ticks=True, autorange=False, zorder=None, *, data=None):
if showfliers=="classic":
classic_fliers=True
else:
classic_fliers=False
ax.boxplot(x, notch=notch, sym=sym, vert=vert, whis=whis, positions=positions, widths=widths, patch_artist=patch_artist, bootstrap=bootstrap, usermedians=usermedians, conf_intervals=conf_intervals, meanline=meanline, showmeans=showmeans, showcaps=showcaps, showbox=showbox,
showfliers=classic_fliers,
boxprops=boxprops, labels=labels, flierprops=flierprops, medianprops=medianprops, meanprops=meanprops, capprops=capprops, whiskerprops=whiskerprops, manage_ticks=manage_ticks, autorange=autorange, zorder=zorder,data=data)
N=len(x)
datashape_message = ("List of boxplot statistics and `{0}` "
"values must have same the length")
# check position
if positions is None:
positions = list(range(1, N + 1))
elif len(positions) != N:
raise ValueError(datashape_message.format("positions"))
positions = np.array(positions)
if len(positions) > 0 and not isinstance(positions[0], Number):
raise TypeError("positions should be an iterable of numbers")
# width
if widths is None:
widths = [np.clip(0.15 * np.ptp(positions), 0.15, 0.5)] * N
elif np.isscalar(widths):
widths = [widths] * N
elif len(widths) != N:
raise ValueError(datashape_message.format("widths"))
if hide_points_within_whiskers:
import matplotlib.cbook as cbook
from matplotlib import rcParams
if whis is None:
whis = rcParams['boxplot.whiskers']
if bootstrap is None:
bootstrap = rcParams['boxplot.bootstrap']
bxpstats = cbook.boxplot_stats(x, whis=whis, bootstrap=bootstrap,
labels=labels, autorange=autorange)
for i in range(N):
if hide_points_within_whiskers:
xi=bxpstats[i]['fliers']
else:
xi=x[i]
if showfliers=="unif":
jitter=np.random.uniform(-widths[i]*0.5,widths[i]*0.5,size=np.size(xi))
elif showfliers=="normal":
jitter=np.random.normal(loc=0.0, scale=widths[i]*0.1,size=np.size(xi))
elif showfliers==False or showfliers=="classic":
return
else:
raise NotImplementedError("showfliers='"+str(showfliers)+"' is not implemented. You can choose from 'unif', 'normal', 'classic' and False")
plt.scatter(positions[i]+jitter,xi,alpha=0.2,marker="o", facecolors='none', edgecolors="k")
and can be added as a method to plt.Axes by
setattr(plt.Axes, "scattered_boxplot", scattered_boxplot)
One still has acces to all the options of boxplots and additionally one can choose the scatering distribution used for the horizontal jitter (e.g. showfliers="unif"
) and one can choose if the fliers outside the whiskers should be shown too (e.g. hide_points_within_whiskers=False
).
This solution works already quite well. An alternative would be to directly change the source code of matplotlib
, mainly in line: https://github.com/matplotlib/matplotlib/blob/9765379ce6e7343070e815afc0988874041b98e2/lib/matplotlib/axes/_axes.py#L4006
seaborn
for this purpose. python-graph-gallery.com/36-add-jitter-over-boxplot-seaborn – Belaud