Source code for verona.visualization.metrics

from typing import Literal

import numpy as np
import pandas as pd
import plotly.express as px
from plotly.graph_objects import Figure
from plotly.subplots import make_subplots
import plotly.graph_objects as go


[docs] def bar_plot_metric(data: pd.DataFrame, x_label: str = 'Dataset', y_label: str = 'Accuracy', reduction: Literal['mean', 'max', 'min', 'median'] = None, y_min: float = 0.0, y_max: float = 100.0, font_size: int = 15, print_values: bool = False, num_decimals: int = 2) -> Figure: """ Generates a bar chart from input data. Args: data (pd.DataFrame): Pandas DataFrame where the columns name correspond to the categories to be represented on the X-axis and the values are either single numerical values or NumPy Arrays. If arrays are used, the `reduction` parameter will be applied. x_label (str, optional): Label for the X axis. Default is ``'Dataset'``. y_label (str, optional): Label for the Y axis. Defaults to ``'Accuracy'``. reduction (Literal['mean', 'max', 'min', 'median'], optional): The reduction function to be applied if the values in the `data` dictionary are NumPy Arrays. y_min (float, optional): The minimum value for the Y-axis. Default is ``0.0``. y_max (float, optional): The maximum value for the Y-axis. Defaults is ``100.0``. font_size (int, optional): Font size of the text in the plot. Default is ``15``. print_values (bool, optional): If True, metric values are printed over each bar. Default is ``False``. num_decimals (int, optional): Number of decimals to display if `print_values` is ``True``. Default is ``2``. Returns: Plotly Figure: ``Plotly Figure`` object representing the bar chart. """ x_values = data.columns.tolist() y_values_raw = data.T.values if y_values_raw.ndim == 2 and y_values_raw.shape[1] == 1: y_values = y_values_raw elif y_values_raw.ndim == 2 and y_values_raw.shape[1] > 1: y_values = __apply_reduction(y_values_raw, reduction) else: raise TypeError(f'Incorrect format for values in data DataFrame: {y_values_raw}. ' f'Only two dimension DataFrames with one or more values per column are allowed.') fig = px.bar(x=x_values, y=y_values, labels={x_label, y_label}) fig.update_yaxes(range=[y_min, y_max]) if print_values: for i, v in enumerate(y_values): fig.add_annotation( x=x_values[i], y=v + 1, text=f'{v:.{num_decimals}f}', showarrow=False, font=dict(size=font_size) ) fig.update_layout(font_size=font_size) fig.update_xaxes(title_text=x_label, tickangle=15, tickfont=dict(size=font_size)) fig.update_yaxes(title_text=y_label, tickfont=dict(size=font_size)) return fig
[docs] def line_plot_metric(data: pd.DataFrame, x_label: str = 'Dataset', y_label: str = 'Accuracy', reduction: Literal['mean', 'max', 'min', 'median'] = None, y_min: float = 0.0, y_max: float = 100.0, font_size: int = 15, print_values: bool = False, num_decimals: int = 2) -> Figure: """ Generates a line chart from input data. Args: data (pd.DataFrame): Pandas DataFrame where the columns name correspond to the categories to be represented on the X-axis and the values are either single numerical values or NumPy Arrays. If arrays are used, the `reduction` parameter will be applied. x_label (str, optional): Label for the X axis. Default is ``'Dataset'``. y_label (str, optional): Label for the Y axis. Default is ``'Accuracy'``. reduction (Literal['mean', 'max', 'min', 'median'], optional): The reduction function to be applied if the values in the `data` dictionary are NumPy Arrays. y_min (float, optional): The minimum value for the Y-axis. Default is ``0.0``. y_max (float, optional): The maximum value for the Y-axis. Default is ``100.0``. font_size (int, optional): Font size of the text in the plot. Default is ``15` . print_values (bool, optional): If ``True``, metric values are printed over each point. Default is ``False``. num_decimals (int, optional): Number of decimals to display if `print_values` is ``True``. Default is ``2``. Returns: Plotly Figure: ``Plotly Figure`` object representing the line chart. """ x_values = data.columns.tolist() y_values_raw = data.T.values if y_values_raw.ndim == 2 and y_values_raw.shape[1] == 1: y_values = y_values_raw elif y_values_raw.ndim == 2 and y_values_raw.shape[1] > 1: y_values = __apply_reduction(y_values_raw, reduction) else: raise TypeError(f'Incorrect format for values in data DataFrame: {y_values_raw}. ' f'Only two dimension DataFrames with one or more values per column are allowed.') fig = px.line(x=x_values, y=y_values, labels={x_label, y_label}, markers=True) fig.update_traces(line={'width': font_size/5}, marker={'size': font_size/2}) fig.update_yaxes(range=[y_min, y_max]) if print_values: for i, v in enumerate(y_values): fig.add_annotation( x=x_values[i], y=v + 2, text=f'{v:.{num_decimals}f}', showarrow=False, font=dict(size=font_size) ) fig.update_layout(font_size=font_size) fig.update_xaxes(title_text=x_label, tickangle=15, tickfont=dict(size=font_size)) fig.update_yaxes(title_text=y_label, tickfont=dict(size=font_size)) return fig
[docs] def box_plot_metric(data: pd.DataFrame, x_label: str = 'Dataset', y_label: str = 'Accuracy', y_min: float = 0.0, y_max: float = 100.0, font_size: int = 15) -> Figure: """ Generates a box plot showing the corresponding box for each category. Args: data (pd.DataFrame): Pandas DataFrame containing the values to be represented in the graph. The columns name correspond to the categories to be represented on the X-axis, while the values associated are used to build the corresponding box. x_label (str, optional): Label for the X axis. Default is ``'Dataset'``. y_label (str, optional): Label for the Y axis. Default is ``'Accuracy'``. y_min (float, optional): The minimum value for the Y-axis. Defaults is ``0.0``. y_max (float, optional): The maximum value for the Y-axis. Default is ``100.0``. font_size (int, optional): Font size of the text in the plot. Default is ``15``. Returns: Plotly Figure: ``Plotly Figure`` object representing the error plot. """ fig = px.box(data, title='Box Plot', labels={y_label, x_label}, range_y=[y_min, y_max]) fig.update_layout(font_size=font_size) fig.update_xaxes(title_text=x_label, tickangle=15, tickfont=dict(size=font_size)) fig.update_yaxes(title_text=y_label, tickfont=dict(size=font_size)) return fig
[docs] def error_plot_metric(data: pd.DataFrame, x_label: str = 'Dataset', y_label: str = 'Accuracy', y_min: float = 0.0, y_max: float = 100.0, font_size: int = 15, print_values: bool = False, num_decimals: int = 2) -> Figure: """ Generates an error plot from input data. This function is particularly useful for visualizing results from cross-validation experiments, as it shows the mean and standard deviation for each NumPy Array of values. Args: data (pd.DataFrame): Pandas DataFrame where the columns name correspond to the categories to be represented on the X-axis and the values are used to construct the corresponding error bars. x_label (str, optional): Label for the X axis. Default is ``'Dataset'``. y_label (str, optional): Label for the Y axis. Default is ``'Accuracy'``. y_min (float, optional): The minimum value for the Y-axis. Default is ``0.0``. y_max (float, optional): The maximum value for the Y-axis. Default is ``100.0``. font_size (int, optional): Font size of the text in the plot. Default is ``15``. print_values (bool, optional): Whether to print metric values over each point. Default is ``False``. num_decimals (int, optional): Number of decimal places to show if `print_values` is ``True``. Default is ``2``. Returns: Plotly Figure: ``Plotly Figure`` object representing the error plot. """ x_values = data.columns.tolist() y_values_raw = data.T.values if y_values_raw.ndim == 2 and y_values_raw.shape[1] > 1: y_values = y_values_raw else: raise TypeError(f'Incorrect format for values in data DataFrame: {y_values_raw}. ' f'Only two dimension DataFrames with two or more values per column are allowed.') y_means = __apply_reduction(y_values, 'mean') y_stds = __apply_reduction(y_values, 'std') fig = px.scatter(x=x_values, y=y_means, error_y=y_stds, labels={x_label, y_label}) fig.update_yaxes(range=[y_min, y_max]) fig.update_traces(error_y={'thickness': font_size / 10}, marker={'size': font_size / 2}) if print_values: for i, (mean_val, std_val) in enumerate(zip(y_means, y_stds)): fig.add_annotation( x=x_values[i], y=mean_val + std_val + 1, text=f'Mean: {mean_val:.{num_decimals}f}<br>Std: {std_val:.{num_decimals}f}', showarrow=False, font=dict(size=font_size) ) fig.update_layout(font_size=font_size) fig.update_xaxes(title_text=x_label, tickangle=15, tickfont=dict(size=font_size)) fig.update_yaxes(title_text=y_label, tickfont=dict(size=font_size)) return fig
[docs] def plot_metric_by_prefixes_len(data: pd.DataFrame, metric_label: str = 'Accuracy', font_size: int = 15, print_values: bool = False, num_decimals: int = 2) -> Figure: """ Generates a mixed plot, where the bar chart indicates the number of prefixes of each length and the line chart indicates the value of the chosen metric for each prefix length. Args: data (pd.DataFrame): Pandas DataFrame where the column names indicate the length of the prefixes and the associated values indicate 1- the value of the metric and 2- the number of prefixes of that length. metric_label (str, optional): Label for the right Y-axis. Default is ``'Accuracy'``. font_size (int, optional): Font size of the text in the plot. Default is ``15``. print_values (bool, optional): Whether to print metric values over each point. Defaults is ``False``. num_decimals (int, optional): Number of decimal places to show if 'print_values' is True. Default is ``2``. Returns: PLotly Figure: ``Plotly Figure`` object representing the bar chart and the line chart. """ x_values = data.columns.tolist() y_values_metric = data.loc[0].values y_values_counts = data.loc[1].values fig = make_subplots(specs=[[{"secondary_y": True}]]) # Add bar chart trace bar_trace = go.Bar(x=x_values, y=y_values_counts, name='Counts') fig.add_trace(bar_trace, secondary_y=False) # Add line chart trace line_trace = go.Scatter(x=x_values, y=y_values_metric, mode='lines+markers', name=metric_label) fig.add_trace(line_trace, secondary_y=True) # Update line chart properties (color and thickness) fig.update_traces(selector=dict(type='scatter'), line=dict(color='red', width=font_size/5), secondary_y=True) if print_values: for i, v in enumerate(y_values_metric): fig.add_annotation( x=x_values[i], y=v * max(y_values_counts) + max(y_values_counts) / 50, text=f'{v:.{num_decimals}f}', showarrow=False, font=dict(size=font_size, color='black') ) fig.update_layout(title=f'{metric_label} by prefix length', font_size=font_size) fig.update_xaxes(title_text='Prefix Length', tickangle=15, tickfont=dict(size=font_size)) fig.update_yaxes(range=[0, max(y_values_counts)*1.03], title_text='Counts', secondary_y=False, tickfont=dict(size=font_size)) fig.update_yaxes(range=[0, max(y_values_metric)*1.03], title_text='Metric', secondary_y=True, tickfont=dict(size=font_size)) return fig
def __apply_reduction(raw_values: np.array, reduction: Literal['mean', 'max', 'min', 'median', 'std']) -> np.array: if reduction == 'mean': return np.array(list(map(np.mean, raw_values))) if reduction == 'max': return np.array(list(map(np.max, raw_values))) if reduction == 'min': return np.array(list(map(np.min, raw_values))) if reduction == 'median': return np.array(list(map(np.median, raw_values))) if reduction == 'std': return np.array(list(map(np.std, raw_values)))