Source code for magmaviz.histogram

import altair as alt
import pandas as pd

[docs]def histogram(df, x, y): """Plot a histogram with the magma color scheme Parameters ---------- df : dataframe Dataframe containing the variables for plotting x : string Column name of the variable to be plotted on the x-axis y : string An aggregation function to be plotted on the y-axis. The supported aggregation operations are: ['average', 'count', 'distinct', 'max', 'mean', 'median', 'min', 'missing', 'product', 'q1', 'q3', 'ci0', 'ci1', 'stderr', 'stdev', 'stdevp', 'sum', 'valid', 'values', 'variance', 'variancep'] Returns ------- altair.vegalite.v4.api.Chart A histogram displaying distribution based on the aggregation function Examples -------- >>> from magmaviz.histogram import histogram >>> histogram(mtcars, "cars", "count()") """ # check if the type of df is a pandas DataFrame if not isinstance(df, pd.core.frame.DataFrame): raise TypeError( "'df' should be of type 'pandas.core.frame.DataFrame', a pandas dataframe." ) # check if the type of x is a string if not isinstance(x, str): raise TypeError("'x' should be of type 'str'.") # check if x is a column in the dataframe if x not in list(df.columns): raise ValueError("The column specified for 'x' does not exist in the dataframe.") validate(df, y) chart = alt.Chart(df).mark_bar().encode( x=x, y=y, color=alt.Color(y, scale=alt.Scale(scheme="magma")) ) return chart
[docs]def validate(df, y): """A helper function to validate input y Parameters ---------- df : dataframe The input dataframe y : string The user specified aggregation function to be plotted on the y-axis. The supported aggregation operations are: ['average', 'count', 'distinct', 'max', 'mean', 'median', 'min', 'missing', 'product', 'q1', 'q3', 'ci0', 'ci1', 'stderr', 'stdev', 'stdevp', 'sum', 'valid', 'values', 'variance', 'variancep'] Returns ------- This function only raises errors Examples -------- >>> validate(data.iris(), 'mean(petalLength)') >>> validate(data.iris(), 'count()') """ supported_operations = ['average', 'count', 'distinct', 'max', 'mean', 'median', 'min', 'missing', 'product', 'q1', 'q3', 'ci0', 'ci1', 'stderr', 'stdev', 'stdevp', 'sum', 'valid', 'values', 'variance', 'variancep'] # check if the type of y is a string if not isinstance(y, str): raise TypeError("'y' should be of type 'str'.") splited_y = y.split("(") function_name = splited_y[0] if len(splited_y) != 2: raise ValueError("'y' is not in a correct format as an aggregation function.") if function_name not in supported_operations: raise ValueError( "The aggregation function specified for 'y' " + "is not one of " + str(supported_operations)) encoding_field = splited_y[1].split(')')[0] # For aggregation functions that need an encoding field, check if the input # has one and is a valid column if function_name in ['average', 'distinct', 'max', 'mean', 'median', 'min', 'missing', 'product', 'q1', 'q3', 'ci0', 'ci1', 'stderr', 'stdev', 'stdevp', 'sum', 'valid', 'values', 'variance', 'variancep']: if encoding_field == '': raise ValueError( "The encoding field 'y' is specified without a type; " + "the type cannot be inferred because it does not match any column in the data" ) if encoding_field not in list(df.columns): raise ValueError("The encoding field for 'y' does not exist in the dataframe.")