Source code for verona.evaluation.stattests.plackettluce

import tempfile
from typing import List, Tuple

import numpy as np
import pandas as pd
from cmdstanpy import CmdStanModel, cmdstan_path, install_cmdstan

from verona.evaluation.stattests.stan_codes import STAN_CODE



[docs]
class PlackettLuceResults:
    """
    Encapsulates the results from running the Plackett-Luce ranking model.

    This class serves as a container for the results obtained after fitting the Plackett-Luce
    model using MCMC sampling. It provides structured access to important quantities such as
    the expected probabilities, expected ranks, and the posterior distributions of these metrics.

    Attributes:
        expected_prob (pd.Series):
            A pandas Series object representing the expected probabilities for each algorithm.
            It quantifies the estimated likelihood that each algorithm is the best among the ones compared.

        expected_rank (pd.Series):
            A pandas Series object representing the expected ranks for each algorithm.
            The rank is a numerical ordering where lower values indicate better performance.

        posterior (dict or similar container):
            A container (e.g., dictionary) that holds the posterior distributions for the rank
            and probabilities of each algorithm. These distributions capture the uncertainties
            in the point estimates and are essential for Bayesian inference.
    """
    def __init__(self, expected_prob: pd.Series, expected_rank: pd.Series, posterior: pd.DataFrame):
        """
        Initializes the PlackettLuceResults object with the estimated metrics.

        Args:
            expected_prob (pd.Series): Expected probabilities for each algorithm.
            expected_rank (pd.Series): Expected ranks for each algorithm.
            posterior (pd.DataFrame): Posterior distributions for rank and probabilities.

        """
        self.expected_prob = expected_prob
        self.expected_rank = expected_rank
        self.posterior = posterior




[docs]
class PlackettLuceRanking:

    def __init__(self, result_matrix: pd.DataFrame, approaches: List[str]):
        """
        Parameters:
            result_matrix (pd.DataFrame): Matrix of results in which each row represents a dataset and each column
                represents an algorithm.
            approaches (List[str]): List of the names of approaches in the result matrix
        """
        self.result_matrix = result_matrix
        self.approaches = approaches

        assert (approaches is not None) and (len(approaches) > 0), "The list of approaches is none or empty"
        assert self.result_matrix.shape[1] == len(approaches), "The number of columns in the result matrix does not " \
                                                               "match the approaches specified"

        self.result_matrix.columns = approaches

        try:
            cmdstan_path()
        except:
            install_cmdstan()


[docs]
    def run(self, n_chains=8, num_samples=300000, mode="max") -> PlackettLuceResults:
        """
        Execute the Plackett-Luce ranking model to estimate the rank and probabilities of each algorithm
        based on their performance metrics.

        The method employs Markov Chain Monte Carlo (MCMC) sampling, leveraging the STAN backend,
        to estimate the posterior distribution of the rank and probabilities.


        Args:
            n_chains (int, optional): Number of chains used ot perform the sampling. Default is ``8``.
            num_samples (int, optional): Number of samples to considerate in the MCMC. Default is ``300000``.
            mode (Literal['max', 'min'], optional): If ``'max'`` the higher the value the better the algorithm.
                If ``'min'`` the lower the value the better the algorithm.
                Default is ``'max'``.

        Returns:
            PlackettLuceResutls : ``PackettLuceResutls`` instance containing:
                - expected_prob: Expected probability of each algorithm having the best ranking
                - expected_rank: Expected rank of each algorithm
                - posterior: Posterior

        Examples:
            >>> result_matrix = pd.DataFrame([[0.75, 0.6, 0.8], [0.8, 0.7, 0.9], [0.9, 0.8, 0.7]])
            >>> plackett_ranking = PlackettLuceRanking(result_matrix, ["a1", "a2", "a3"])
            >>> results = plackett_ranking.run(n_chains=10, num_samples=300000, mode="max")
            >>> print("Expected prob: ", results.expected_prob)
            Expected prob:  a1    0.432793
                            a2    0.179620
                            a3    0.387587
            >>> print("Expected rank: ", results.expected_rank)
            Expected rank:  a1    1.580505
                            a2    2.667531
                            a3    1.751964
        """

        assert mode in ["max", "min"]
        assert n_chains > 0
        assert num_samples > 0

        rank_matrix = self._get_rank_matrix(result_matrix=self.result_matrix, mode=mode)
        stan_result = self._run_stan(rank_matrix=rank_matrix, n_chains=n_chains, num_samples=num_samples)
        expected_prob, expected_rank, posterior = self._get_results_from_stan(stan_results=stan_result)
        self.posterior = posterior
        results = PlackettLuceResults(expected_prob, expected_rank, posterior)
        return results


    def _get_rank_matrix(self, result_matrix: pd.DataFrame, mode="max") -> pd.DataFrame:
        """
        Compute the rank matrix of a matrix of results. If the mode is max, assume that the higher the result,
        the better.
        If the mode is min, do otherwise.

        Args:
            result_matrix (pd.DataFrame): Matrix of results.
            mode (Literal['max', 'min'], optional): ``'max'`` for assigning better ranks to high results.
                ``'min'`` for otherwise.
                Default is ``'max'``.

        Returns:
            rank_matrix : Rank matrix of the result matrix.

        """
        if mode == "min":
            rank_matrix = result_matrix.rank(axis=1, ascending=True)
        else:
            rank_matrix = result_matrix.rank(axis=1, ascending=False)

        rank_matrix = rank_matrix.astype(int)

        return rank_matrix

    def _run_stan(self, rank_matrix, n_chains=8, num_samples=300000) -> pd.DataFrame:
        """
        Execute the STAN program for the Plackett-Luce ranking model.

        Parameters:
            rank_matrix : Matrix of ranks.
            n_chains (int, optional): Number of simulations. Default is ``8``.
            num_samples (int, optional): Number of samples. Default is ``300000``.

        Returns:
            results : Raw results from executing the STAN program.
        """

        stan_code = STAN_CODE.PLACKETT_LUCE_TEST_V3
        rank_matrix = np.array(rank_matrix)
        n = rank_matrix.shape[0]
        m = rank_matrix.shape[1]
        alpha = [1] * m
        weights = [1] * n
        stan_data = {
            "n": n,
            "m": m,
            "ranks": rank_matrix,
            "alpha": alpha,
            "weights": weights
        }

        with tempfile.NamedTemporaryFile(suffix='.stan', delete=False) as temp:
            temp.write(stan_code.encode('utf-8'))
            temp_file_name = temp.name  # Save the filename to use later

        model = CmdStanModel(stan_file=temp_file_name)
        fit = model.sample(data=stan_data, chains=n_chains, iter_sampling=num_samples,
                           iter_warmup=int(num_samples/4), seed=42)

        results = fit.draws_pd()

        import os
        os.remove(temp_file_name)

        return results

    def _get_results_from_stan(self, stan_results) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
        """
        Extract, parse and beautify the STAN results.

        Parameters
        ----------
        stan_results : Raw stan results.

        Returns
        -------
        expected_prob : Expected probability of each approach to be the best in terms of ranking.
        expected_rank : Expected rank of the approach.
        posterior : Posterior probability, used to calculate the plot.
        """
        columns = [col for col in stan_results.columns if "ratings" in col]
        posterior = stan_results[columns]
        # Set the approaches names so the figure generated has meaningful names
        posterior.columns = self.approaches
        ranks = (posterior * -1).rank(axis=1)
        expected_prob = posterior.mean(axis=0)
        expected_rank = ranks.mean(axis=0)
        return expected_prob, expected_rank, posterior