Skip to content

novae.monitor

novae.monitor.jensen_shannon_divergence(adatas, obs_key, slide_key=None)

Jensen-Shannon divergence (JSD) over all slides

Parameters:

Name Type Description Default
adatas AnnData | list[AnnData]

One or a list of AnnData object(s)

required
obs_key str

Key of adata.obs containing the domains annotation.

required
slide_key str

Optional key of adata.obs containing the ID of each slide. Not needed if each adata is a slide.

None

Returns:

Type Description
float

The Jensen-Shannon divergence score for all slides

Source code in novae/monitor/eval.py
@utils.format_docs
def jensen_shannon_divergence(adatas: AnnData | list[AnnData], obs_key: str, slide_key: str = None) -> float:
    """Jensen-Shannon divergence (JSD) over all slides

    Args:
        adatas: One or a list of AnnData object(s)
        {obs_key}
        {slide_key}

    Returns:
        The Jensen-Shannon divergence score for all slides
    """
    all_categories = set()
    for adata in _iter_uid(adatas, slide_key=slide_key, obs_key=obs_key):
        all_categories.update(adata.obs[obs_key].cat.categories)
    all_categories = sorted(all_categories)

    distributions = []
    for adata in _iter_uid(adatas, slide_key=slide_key, obs_key=obs_key):

        value_counts = adata.obs[obs_key].value_counts(sort=False)
        distribution = np.zeros(len(all_categories))

        for i, category in enumerate(all_categories):
            if category in value_counts:
                distribution[i] = value_counts[category]

        distributions.append(distribution)

    return _jensen_shannon_divergence(np.array(distributions))

novae.monitor.fide_score(adata, obs_key, n_classes=None)

F1-score of intra-domain edges (FIDE). A high score indicates a great domain continuity.

Note

The F1-score is computed for every class, then all F1-scores are averaged. If some classes are not predicted, the n_classes argument allows to pad with zeros before averaging the F1-scores.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object

required
obs_key str

Key of adata.obs containing the domains annotation.

required
n_classes int | None

Optional number of classes. This can be useful if not all classes are predicted, for a fair comparision.

None

Returns:

Type Description
float

The FIDE score.

Source code in novae/monitor/eval.py
@utils.format_docs
def fide_score(adata: AnnData, obs_key: str, n_classes: int | None = None) -> float:
    """F1-score of intra-domain edges (FIDE). A high score indicates a great domain continuity.

    Note:
        The F1-score is computed for every class, then all F1-scores are averaged. If some classes
        are not predicted, the `n_classes` argument allows to pad with zeros before averaging the F1-scores.

    Args:
        adata: An `AnnData` object
        {obs_key}
        n_classes: Optional number of classes. This can be useful if not all classes are predicted, for a fair comparision.

    Returns:
        The FIDE score.
    """
    i_left, i_right = adata.obsp[Keys.ADJ].nonzero()
    classes_left, classes_right = adata.obs.iloc[i_left][obs_key].values, adata.obs.iloc[i_right][obs_key].values

    where_valid = ~classes_left.isna() & ~classes_right.isna()
    classes_left, classes_right = classes_left[where_valid], classes_right[where_valid]

    f1_scores = metrics.f1_score(classes_left, classes_right, average=None)

    if n_classes is None:
        return f1_scores.mean()

    assert n_classes >= len(f1_scores), f"Expected {n_classes:=}, but found {len(f1_scores)}, which is greater"

    return np.pad(f1_scores, (0, n_classes - len(f1_scores))).mean()

novae.monitor.mean_fide_score(adatas, obs_key, slide_key=None, n_classes=None)

Mean FIDE score over all slides. A low score indicates a great domain continuity.

Parameters:

Name Type Description Default
adatas AnnData | list[AnnData]

An AnnData object, or a list of AnnData objects.

required
obs_key str

Key of adata.obs containing the domains annotation.

required
slide_key str

Optional key of adata.obs containing the ID of each slide. Not needed if each adata is a slide.

None
n_classes int | None

Optional number of classes. This can be useful if not all classes are predicted, for a fair comparision.

None

Returns:

Type Description
float

The FIDE score averaged for all slides.

Source code in novae/monitor/eval.py
@utils.format_docs
def mean_fide_score(
    adatas: AnnData | list[AnnData], obs_key: str, slide_key: str = None, n_classes: int | None = None
) -> float:
    """Mean FIDE score over all slides. A low score indicates a great domain continuity.

    Args:
        adatas: An `AnnData` object, or a list of `AnnData` objects.
        {obs_key}
        {slide_key}
        n_classes: Optional number of classes. This can be useful if not all classes are predicted, for a fair comparision.

    Returns:
        The FIDE score averaged for all slides.
    """
    return np.mean(
        [
            fide_score(adata, obs_key, n_classes=n_classes)
            for adata in _iter_uid(adatas, slide_key=slide_key, obs_key=obs_key)
        ]
    )

novae.monitor.svg_score(adata, obs_key, n_top_genes=3, n_classes=None)

Average score of the top differentially expressed genes for each domain.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object

required
obs_key str

Key of adata.obs containing the domains annotation.

required
n_top_genes int

Number of genes per domain to consider.

3

Returns:

Type Description
float

The average SVG score.

Source code in novae/monitor/eval.py
@utils.format_docs
def svg_score(adata: AnnData, obs_key: str, n_top_genes: int = 3, n_classes: int | None = None) -> float:
    """Average score of the top differentially expressed genes for each domain.

    Args:
        adata: An `AnnData` object
        {obs_key}
        {n_top_genes}

    Returns:
        The average SVG score.
    """
    if adata.obs[obs_key].value_counts().min() < 2:
        log.warning(f"Skipping {obs_key=} because some domains have one or zero cell")
        return -1000

    sc.tl.rank_genes_groups(adata, groupby=obs_key)

    sub_recarray: np.recarray = adata.uns["rank_genes_groups"]["scores"][:n_top_genes]
    mean_per_domain = [sub_recarray[field].mean() for field in sub_recarray.dtype.names]

    return np.mean(mean_per_domain) if n_classes is None else np.sum(mean_per_domain) / n_classes

novae.monitor.mean_svg_score(adata, obs_key, slide_key=None, n_top_genes=3, n_classes=None)

Mean SVG score over all slides. A high score indicates better domain-specific genes, or spatial variable genes.

Parameters:

Name Type Description Default
adata AnnData | list[AnnData]

An AnnData object, or a list.

required
obs_key str

Key of adata.obs containing the domains annotation.

required
slide_key str

Optional key of adata.obs containing the ID of each slide. Not needed if each adata is a slide.

None
n_top_genes int

Number of genes per domain to consider.

3

Returns:

Type Description
float

The mean SVG score accross all slides.

Source code in novae/monitor/eval.py
@utils.format_docs
def mean_svg_score(
    adata: AnnData | list[AnnData],
    obs_key: str,
    slide_key: str = None,
    n_top_genes: int = 3,
    n_classes: int | None = None,
) -> float:
    """Mean SVG score over all slides. A high score indicates better domain-specific genes, or spatial variable genes.

    Args:
        adata: An `AnnData` object, or a list.
        {obs_key}
        {slide_key}
        {n_top_genes}

    Returns:
        The mean SVG score accross all slides.
    """
    return np.mean(
        [
            svg_score(adata, obs_key, n_top_genes=n_top_genes, n_classes=n_classes)
            for adata in _iter_uid(adata, slide_key=slide_key, obs_key=obs_key)
        ]
    )