Skip to content

Metrics

novae.monitor.jensen_shannon_divergence(adatas, obs_key, slide_key=None)

Jensen-Shannon divergence (JSD) over all slides

Parameters:

Name Type Description Default
adatas AnnData | list[AnnData]

One or a list of AnnData object(s)

required
obs_key str

Key of adata.obs containing the domains annotation.

required
slide_key str

Optional key of adata.obs containing the ID of each slide. Not needed if each adata is a slide.

None

Returns:

Type Description
float

The Jensen-Shannon divergence score for all slides

Source code in novae/monitor/eval.py
def jensen_shannon_divergence(adatas: AnnData | list[AnnData], obs_key: str, slide_key: str = None) -> float:
    """Jensen-Shannon divergence (JSD) over all slides

    Args:
        adatas: One or a list of AnnData object(s)
        obs_key: Key of `adata.obs` containing the domains annotation.
        slide_key: Optional key of `adata.obs` containing the ID of each slide. Not needed if each `adata` is a slide.

    Returns:
        The Jensen-Shannon divergence score for all slides
    """
    all_categories = set()
    for adata in _iter_uid(adatas, slide_key=slide_key, obs_key=obs_key):
        all_categories.update(adata.obs[obs_key].cat.categories)
    all_categories = sorted(all_categories)

    distributions = []
    for adata in _iter_uid(adatas, slide_key=slide_key, obs_key=obs_key):

        value_counts = adata.obs[obs_key].value_counts(sort=False)
        distribution = np.zeros(len(all_categories))

        for i, category in enumerate(all_categories):
            if category in value_counts:
                distribution[i] = value_counts[category]

        distributions.append(distribution)

    return _jensen_shannon_divergence(np.array(distributions))

novae.monitor.fide_score(adata, obs_key, n_classes=None)

F1-score of intra-domain edges (FIDE). A high score indicates a great domain continuity.

Note

The F1-score is computed for every class, then all F1-scores are averaged. If some classes are not predicted, the n_classes argument allows to pad with zeros before averaging the F1-scores.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object

required
obs_key str

Key of adata.obs containing the domains annotation.

required
n_classes int | None

Optional number of classes. This can be useful if not all classes are predicted, for a fair comparision.

None

Returns:

Type Description
float

The FIDE score.

Source code in novae/monitor/eval.py
def fide_score(adata: AnnData, obs_key: str, n_classes: int | None = None) -> float:
    """F1-score of intra-domain edges (FIDE). A high score indicates a great domain continuity.

    Note:
        The F1-score is computed for every class, then all F1-scores are averaged. If some classes
        are not predicted, the `n_classes` argument allows to pad with zeros before averaging the F1-scores.

    Args:
        adata: An `AnnData` object
        obs_key: Key of `adata.obs` containing the domains annotation.
        n_classes: Optional number of classes. This can be useful if not all classes are predicted, for a fair comparision.

    Returns:
        The FIDE score.
    """
    i_left, i_right = adata.obsp[Keys.ADJ].nonzero()
    classes_left, classes_right = adata.obs.iloc[i_left][obs_key].values, adata.obs.iloc[i_right][obs_key].values

    where_valid = ~classes_left.isna() & ~classes_right.isna()
    classes_left, classes_right = classes_left[where_valid], classes_right[where_valid]

    f1_scores = metrics.f1_score(classes_left, classes_right, average=None)

    if n_classes is None:
        return f1_scores.mean()

    assert n_classes >= len(f1_scores), f"Expected {n_classes:=}, but found {len(f1_scores)}, which is greater"

    return np.pad(f1_scores, (0, n_classes - len(f1_scores))).mean()

novae.monitor.mean_fide_score(adatas, obs_key, slide_key=None, n_classes=None)

Mean FIDE score over all slides. A low score indicates a great domain continuity.

Parameters:

Name Type Description Default
adatas AnnData | list[AnnData]

An AnnData object, or a list of AnnData objects.

required
obs_key str

Key of adata.obs containing the domains annotation.

required
slide_key str

Optional key of adata.obs containing the ID of each slide. Not needed if each adata is a slide.

None
n_classes int | None

Optional number of classes. This can be useful if not all classes are predicted, for a fair comparision.

None

Returns:

Type Description
float

The FIDE score averaged for all slides.

Source code in novae/monitor/eval.py
def mean_fide_score(
    adatas: AnnData | list[AnnData], obs_key: str, slide_key: str = None, n_classes: int | None = None
) -> float:
    """Mean FIDE score over all slides. A low score indicates a great domain continuity.

    Args:
        adatas: An `AnnData` object, or a list of `AnnData` objects.
        obs_key: Key of `adata.obs` containing the domains annotation.
        slide_key: Optional key of `adata.obs` containing the ID of each slide. Not needed if each `adata` is a slide.
        n_classes: Optional number of classes. This can be useful if not all classes are predicted, for a fair comparision.

    Returns:
        The FIDE score averaged for all slides.
    """
    return np.mean(
        [
            fide_score(adata, obs_key, n_classes=n_classes)
            for adata in _iter_uid(adatas, slide_key=slide_key, obs_key=obs_key)
        ]
    )