Source code for magmaviz.boxplot

import altair as alt
import pandas as pd
import re


[docs]def boxplot(df, x, y, facet_col=""): """Plot a boxplot with the magma color scheme and an option to facet. Parameters ---------- df : dataframe Dataframe containing the variables for plotting x : string Column name of the numerical variable to view the distribution of y : list Column name containing the categorical variables to assign boxes to facet_col : column to use for faceting Determines whether separate graphs will be created for each category Returns ------- altair.vegalite.v4.api.Chart Boxplot displaying distribution of categorical variables with/without faceting Examples -------- >>> from magmaviz.magmaviz import boxplot >>> from vega_datasets import data >>> boxplot(data.cars(), "Miles_per_Gallon", "Origin", facet_col="Cylinders") """ # checking that the dataframe is a pandas dataframe type if not isinstance(df, pd.core.frame.DataFrame): raise TypeError( "'df' should be of type 'pandas.core.frame.DataFrame', a pandas dataframe." ) # checking that type of column name for x is a string if not isinstance(x, str): raise TypeError("'x' should be of type 'str'.") # checking that type of column name for y is a string if not isinstance(y, str): raise TypeError("'y' should be of type 'str'.") # checking that type for facet is a boolean if not isinstance(facet_col, str): raise TypeError("'facet_col' should be of type 'string'.") # checking if x is a column name in the dataframe assert x in list( df.columns ), "This column specified for 'x' does not exist in the dataframe." # checking if y is a column name in the dataframe assert y in list( df.columns ), "This column specified for 'y' does not exist in the dataframe." # checking if facet_col is a column name in the dataframe if len(facet_col) > 0: assert facet_col in list( df.columns ), "This column specified for 'facet_col' does not exist in the dataframe." # creating titles x_title = re.sub(r"[_.,-]", " ", x) y_title = re.sub(r"[_.,-]", " ", y) # creating plot object plot = ( alt.Chart(df) .mark_boxplot() .encode( x=alt.X(x, title=x_title), y=alt.Y(y, title=y_title), color=alt.Color(y, scale=alt.Scale(scheme="magma"), legend=None), ) ) # facet if required if len(facet_col) > 0: return plot.facet(row=facet_col) else: return plot