Source code for choix.lsr

"""(Iterative) Luce Spectral Ranking and related inference algorithms."""

from collections.abc import Callable

import numpy as np
from numpy.typing import NDArray

from .convergence import NormOfDifferenceTest
from .typing import PairwiseData, RankingData, Top1Data
from .utils import exp_transform, log_transform, statdist


def _init_lsr(
    n_items: int,
    alpha: float,
    initial_params: NDArray[np.float64] | None,
) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
    """Initialize the LSR Markov chain and the weights."""
    if initial_params is None:
        weights = np.ones(n_items)
    else:
        weights = exp_transform(initial_params)
    chain = alpha * np.ones((n_items, n_items), dtype=float)
    return weights, chain


def _ilsr(
    fun: Callable[[NDArray[np.float64] | None], NDArray[np.float64]],
    params: NDArray[np.float64] | None,
    max_iter: int,
    tol: float,
) -> NDArray[np.float64]:
    """Iteratively refine LSR estimates until convergence.

    Raises
    ------
    RuntimeError
        If the algorithm does not converge after ``max_iter`` iterations.
    """
    converged = NormOfDifferenceTest(tol, order=1)
    for _ in range(max_iter):
        params = fun(params)
        if converged(params):
            return params
    raise RuntimeError("Did not converge after {} iterations".format(max_iter))


[docs] def lsr_pairwise( n_items: int, data: PairwiseData, alpha: float = 0.0, initial_params: NDArray[np.float64] | None = None, ) -> NDArray[np.float64]: """Compute the LSR estimate of model parameters. This function implements the Luce Spectral Ranking inference algorithm [MG15]_ for pairwise-comparison data (see :ref:`data-pairwise`). The argument ``initial_params`` can be used to iteratively refine an existing parameter estimate (see the implementation of :func:`~choix.ilsr_pairwise` for an idea on how this works). If it is set to `None` (the default), the all-ones vector is used. The transition rates of the LSR Markov chain are initialized with ``alpha``. When ``alpha > 0``, this corresponds to a form of regularization (see :ref:`regularization` for details). Parameters ---------- n_items : int Number of distinct items. data : list of lists Pairwise-comparison data. alpha : float, optional Regularization parameter. initial_params : array_like, optional Parameters used to build the transition rates of the LSR Markov chain. Returns ------- params : numpy.ndarray An estimate of model parameters. """ weights, chain = _init_lsr(n_items, alpha, initial_params) for winner, loser in data: chain[loser, winner] += 1 / (weights[winner] + weights[loser]) chain -= np.diag(chain.sum(axis=1)) return log_transform(statdist(chain))
[docs] def ilsr_pairwise( n_items: int, data: PairwiseData, alpha: float = 0.0, initial_params: NDArray[np.float64] | None = None, max_iter: int = 100, tol: float = 1e-8, ) -> NDArray[np.float64]: """Compute the ML estimate of model parameters using I-LSR. This function computes the maximum-likelihood (ML) estimate of model parameters given pairwise-comparison data (see :ref:`data-pairwise`), using the iterative Luce Spectral Ranking algorithm [MG15]_. The transition rates of the LSR Markov chain are initialized with ``alpha``. When ``alpha > 0``, this corresponds to a form of regularization (see :ref:`regularization` for details). Parameters ---------- n_items : int Number of distinct items. data : list of lists Pairwise-comparison data. alpha : float, optional Regularization parameter. initial_params : array_like, optional Parameters used to initialize the iterative procedure. max_iter : int, optional Maximum number of iterations allowed. tol : float, optional Maximum L1-norm of the difference between successive iterates to declare convergence. Returns ------- params : numpy.ndarray The ML estimate of model parameters. """ def fun(params: NDArray[np.float64] | None) -> NDArray[np.float64]: return lsr_pairwise(n_items=n_items, data=data, alpha=alpha, initial_params=params) return _ilsr(fun, initial_params, max_iter, tol)
[docs] def lsr_pairwise_dense( comp_mat: NDArray[np.float64], alpha: float = 0.0, initial_params: NDArray[np.float64] | None = None, ) -> NDArray[np.float64]: """Compute the LSR estimate of model parameters given dense data. This function implements the Luce Spectral Ranking inference algorithm [MG15]_ for dense pairwise-comparison data. The data is described by a pairwise-comparison matrix ``comp_mat`` such that ``comp_mat[i,j]`` contains the number of times that item ``i`` wins against item ``j``. In comparison to :func:`~choix.lsr_pairwise`, this function is particularly efficient for dense pairwise-comparison datasets (i.e., containing many comparisons for a large fraction of item pairs). The argument ``initial_params`` can be used to iteratively refine an existing parameter estimate (see the implementation of :func:`~choix.ilsr_pairwise` for an idea on how this works). If it is set to `None` (the default), the all-ones vector is used. The transition rates of the LSR Markov chain are initialized with ``alpha``. When ``alpha > 0``, this corresponds to a form of regularization (see :ref:`regularization` for details). Parameters ---------- comp_mat : np.array 2D square matrix describing the pairwise-comparison outcomes. alpha : float, optional Regularization parameter. initial_params : array_like, optional Parameters used to build the transition rates of the LSR Markov chain. Returns ------- params : np.array An estimate of model parameters. """ n_items = comp_mat.shape[0] ws, chain = _init_lsr(n_items, alpha, initial_params) denom = np.tile(ws, (n_items, 1)) chain += comp_mat.T / (denom + denom.T) chain -= np.diag(chain.sum(axis=1)) return log_transform(statdist(chain))
[docs] def ilsr_pairwise_dense( comp_mat: NDArray[np.float64], alpha: float = 0.0, initial_params: NDArray[np.float64] | None = None, max_iter: int = 100, tol: float = 1e-8, ) -> NDArray[np.float64]: """Compute the ML estimate of model parameters given dense data. This function computes the maximum-likelihood (ML) estimate of model parameters given dense pairwise-comparison data. The data is described by a pairwise-comparison matrix ``comp_mat`` such that ``comp_mat[i,j]`` contains the number of times that item ``i`` wins against item ``j``. In comparison to :func:`~choix.ilsr_pairwise`, this function is particularly efficient for dense pairwise-comparison datasets (i.e., containing many comparisons for a large fraction of item pairs). The transition rates of the LSR Markov chain are initialized with ``alpha``. When ``alpha > 0``, this corresponds to a form of regularization (see :ref:`regularization` for details). Parameters ---------- comp_mat : np.array 2D square matrix describing the pairwise-comparison outcomes. alpha : float, optional Regularization parameter. initial_params : array_like, optional Parameters used to initialize the iterative procedure. max_iter : int, optional Maximum number of iterations allowed. tol : float, optional Maximum L1-norm of the difference between successive iterates to declare convergence. Returns ------- params : numpy.ndarray The ML estimate of model parameters. """ def fun(params: NDArray[np.float64] | None) -> NDArray[np.float64]: return lsr_pairwise_dense(comp_mat=comp_mat, alpha=alpha, initial_params=params) return _ilsr(fun, initial_params, max_iter, tol)
[docs] def rank_centrality(n_items: int, data: PairwiseData, alpha: float = 0.0) -> NDArray[np.float64]: """Compute the Rank Centrality estimate of model parameters. This function implements Negahban et al.'s Rank Centrality algorithm [NOS12]_. The algorithm is similar to :func:`~choix.ilsr_pairwise`, but considers the *ratio* of wins for each pair (instead of the total count). The transition rates of the Rank Centrality Markov chain are initialized with ``alpha``. When ``alpha > 0``, this corresponds to a form of regularization (see :ref:`regularization` for details). Parameters ---------- n_items : int Number of distinct items. data : list of lists Pairwise-comparison data. alpha : float, optional Regularization parameter. Returns ------- params : numpy.ndarray An estimate of model parameters. """ _, chain = _init_lsr(n_items, alpha, None) for winner, loser in data: chain[loser, winner] += 1.0 # Transform the counts into ratios. idx = chain > 0 # Indices (i,j) of non-zero entries. chain[idx] = chain[idx] / (chain + chain.T)[idx] # Finalize the Markov chain by adding the self-transition rate. chain -= np.diag(chain.sum(axis=1)) return log_transform(statdist(chain))
[docs] def lsr_rankings( n_items: int, data: RankingData, alpha: float = 0.0, initial_params: NDArray[np.float64] | None = None, ) -> NDArray[np.float64]: """Compute the LSR estimate of model parameters. This function implements the Luce Spectral Ranking inference algorithm [MG15]_ for ranking data (see :ref:`data-rankings`). The argument ``initial_params`` can be used to iteratively refine an existing parameter estimate (see the implementation of :func:`~choix.ilsr_rankings` for an idea on how this works). If it is set to `None` (the default), the all-ones vector is used. The transition rates of the LSR Markov chain are initialized with ``alpha``. When ``alpha > 0``, this corresponds to a form of regularization (see :ref:`regularization` for details). Parameters ---------- n_items : int Number of distinct items. data : list of lists Ranking data. alpha : float, optional Regularization parameter. initial_params : array_like, optional Parameters used to build the transition rates of the LSR Markov chain. Returns ------- params : numpy.ndarray An estimate of model parameters. """ weights, chain = _init_lsr(n_items, alpha, initial_params) for ranking in data: sum_ = weights.take(ranking).sum() for i, winner in enumerate(ranking[:-1]): val = 1.0 / sum_ for loser in ranking[i + 1 :]: chain[loser, winner] += val sum_ -= weights[winner] chain -= np.diag(chain.sum(axis=1)) return log_transform(statdist(chain))
[docs] def ilsr_rankings( n_items: int, data: RankingData, alpha: float = 0.0, initial_params: NDArray[np.float64] | None = None, max_iter: int = 100, tol: float = 1e-8, ) -> NDArray[np.float64]: """Compute the ML estimate of model parameters using I-LSR. This function computes the maximum-likelihood (ML) estimate of model parameters given ranking data (see :ref:`data-rankings`), using the iterative Luce Spectral Ranking algorithm [MG15]_. The transition rates of the LSR Markov chain are initialized with ``alpha``. When ``alpha > 0``, this corresponds to a form of regularization (see :ref:`regularization` for details). Parameters ---------- n_items : int Number of distinct items. data : list of lists Ranking data. alpha : float, optional Regularization parameter. initial_params : array_like, optional Parameters used to initialize the iterative procedure. max_iter : int, optional Maximum number of iterations allowed. tol : float, optional Maximum L1-norm of the difference between successive iterates to declare convergence. Returns ------- params : numpy.ndarray The ML estimate of model parameters. """ def fun(params: NDArray[np.float64] | None) -> NDArray[np.float64]: return lsr_rankings(n_items=n_items, data=data, alpha=alpha, initial_params=params) return _ilsr(fun, initial_params, max_iter, tol)
[docs] def lsr_top1( n_items: int, data: Top1Data, alpha: float = 0.0, initial_params: NDArray[np.float64] | None = None, ) -> NDArray[np.float64]: """Compute the LSR estimate of model parameters. This function implements the Luce Spectral Ranking inference algorithm [MG15]_ for top-1 data (see :ref:`data-top1`). The argument ``initial_params`` can be used to iteratively refine an existing parameter estimate (see the implementation of :func:`~choix.ilsr_top1` for an idea on how this works). If it is set to `None` (the default), the all-ones vector is used. The transition rates of the LSR Markov chain are initialized with ``alpha``. When ``alpha > 0``, this corresponds to a form of regularization (see :ref:`regularization` for details). Parameters ---------- n_items : int Number of distinct items. data : list of lists Top-1 data. alpha : float Regularization parameter. initial_params : array_like Parameters used to build the transition rates of the LSR Markov chain. Returns ------- params : numpy.ndarray An estimate of model parameters. """ weights, chain = _init_lsr(n_items, alpha, initial_params) for winner, losers in data: val = 1 / (weights.take(losers).sum() + weights[winner]) for loser in losers: chain[loser, winner] += val chain -= np.diag(chain.sum(axis=1)) return log_transform(statdist(chain))
[docs] def ilsr_top1( n_items: int, data: Top1Data, alpha: float = 0.0, initial_params: NDArray[np.float64] | None = None, max_iter: int = 100, tol: float = 1e-8, ): """Compute the ML estimate of model parameters using I-LSR. This function computes the maximum-likelihood (ML) estimate of model parameters given top-1 data (see :ref:`data-top1`), using the iterative Luce Spectral Ranking algorithm [MG15]_. The transition rates of the LSR Markov chain are initialized with ``alpha``. When ``alpha > 0``, this corresponds to a form of regularization (see :ref:`regularization` for details). Parameters ---------- n_items : int Number of distinct items. data : list of lists Top-1 data. alpha : float, optional Regularization parameter. initial_params : array_like, optional Parameters used to initialize the iterative procedure. max_iter : int, optional Maximum number of iterations allowed. tol : float, optional Maximum L1-norm of the difference between successive iterates to declare convergence. Returns ------- params : numpy.ndarray The ML estimate of model parameters. """ def fun(params: NDArray[np.float64] | None) -> NDArray[np.float64]: return lsr_top1(n_items=n_items, data=data, alpha=alpha, initial_params=params) return _ilsr(fun, initial_params, max_iter, tol)