Skip to content

novae.plot

novae.plot.domains(adata, obs_key=None, slide_name_key=None, cell_size=10, ncols=4, fig_size_per_slide=(5, 5), na_color='#ccc', show=False, **kwargs)

Show the Novae spatial domains for all slides in the AnnData object.

Info

Make sure you have already your Novae domains assigned to the AnnData object. You can use model.assign_domains(...) to do so.

Parameters:

Name Type Description Default
adata AnnData | list[AnnData]

An AnnData object, or a list of AnnData objects.

required
obs_key str | None

Name of the key from adata.obs containing the Novae domains. By default, the last available domain key is shown.

None
slide_name_key str | None

Key of adata.obs that contains the slide names. By default, uses the Novae unique slide ID.

None
cell_size int | None

Size of the cells or spots.

10
ncols int

Number of columns to be shown.

4
fig_size_per_slide tuple[int, int]

Size of the figure for each slide.

(5, 5)
na_color str

Color for cells that does not belong to any domain (i.e. cells with a too small neighborhood).

'#ccc'
show bool

Whether to show the plot.

False
**kwargs int

Additional arguments for sc.pl.spatial.

{}
Source code in novae/plot/_spatial.py
def domains(
    adata: AnnData | list[AnnData],
    obs_key: str | None = None,
    slide_name_key: str | None = None,
    cell_size: int | None = 10,
    ncols: int = 4,
    fig_size_per_slide: tuple[int, int] = (5, 5),
    na_color: str = "#ccc",
    show: bool = False,
    **kwargs: int,
):
    """Show the Novae spatial domains for all slides in the `AnnData` object.

    Info:
        Make sure you have already your Novae domains assigned to the `AnnData` object. You can use `model.assign_domains(...)` to do so.

    Args:
        adata: An `AnnData` object, or a list of `AnnData` objects.
        obs_key: Name of the key from `adata.obs` containing the Novae domains. By default, the last available domain key is shown.
        slide_name_key: Key of `adata.obs` that contains the slide names. By default, uses the Novae unique slide ID.
        cell_size: Size of the cells or spots.
        ncols: Number of columns to be shown.
        fig_size_per_slide: Size of the figure for each slide.
        na_color: Color for cells that does not belong to any domain (i.e. cells with a too small neighborhood).
        show: Whether to show the plot.
        **kwargs: Additional arguments for `sc.pl.spatial`.
    """
    if obs_key is not None:
        assert str(obs_key).startswith(Keys.DOMAINS_PREFIX), f"Received {obs_key=}, which is not a valid Novae obs_key"

    adatas = adata if isinstance(adata, list) else [adata]
    slide_name_key = slide_name_key if slide_name_key is not None else Keys.SLIDE_ID
    obs_key = utils.check_available_domains_key(adatas, obs_key)

    for adata in adatas:
        sanitize_anndata(adata)

    all_domains, colors = get_categorical_color_palette(adatas, obs_key)

    n_slides = sum(len(adata.obs[slide_name_key].cat.categories) for adata in adatas)
    ncols = n_slides if n_slides < ncols else ncols
    nrows = (n_slides + ncols - 1) // ncols

    fig, axes = plt.subplots(
        nrows, ncols, figsize=(ncols * fig_size_per_slide[0], nrows * fig_size_per_slide[1]), squeeze=False
    )

    i = 0
    for adata in adatas:
        for slide_name in adata.obs[slide_name_key].cat.categories:
            ax = axes[i // ncols, i % ncols]
            adata_ = adata[adata.obs[slide_name_key] == slide_name]
            sc.pl.spatial(adata_, spot_size=cell_size, color=obs_key, ax=ax, show=False, **kwargs)
            sns.despine(ax=ax, offset=10, trim=True)
            ax.get_legend().remove()
            ax.set_title(slide_name)
            i += 1

    [fig.delaxes(ax) for ax in axes.flatten() if not ax.has_data()]  # remove unused subplots

    title = f"Novae domains ({obs_key})"

    if i == 1:
        axes[0, 0].set_title(title)
    else:
        fig.suptitle(title, fontsize=14, y=1.15)

    handles = [
        Line2D([0], [0], marker="o", color="w", markerfacecolor=color, markersize=8, linestyle="None")
        for color in colors + [na_color]
    ]
    fig.legend(
        handles,
        all_domains + ["NA"],
        loc="upper center" if i > 1 else "center left",
        bbox_to_anchor=(0.5, 1.1) if i > 1 else (1.04, 0.5),
        borderaxespad=0,
        frameon=False,
        ncol=len(colors) // (3 if i > 1 else 10) + 1,
    )

    if show:
        plt.show()

novae.plot.domains_proportions(adata, obs_key=None, figsize=(2, 5))

Show the proportion of each domain in the slide(s).

Parameters:

Name Type Description Default
adata AnnData | list[AnnData]

One AnnData object, or a list of AnnData objects.

required
obs_key str | None

The key in adata.obs that contains the Novae domains. By default, the last available domain key is shown.

None
figsize tuple[int, int]

Matplotlib figure size.

(2, 5)
Source code in novae/plot/_bar.py
def domains_proportions(adata: AnnData | list[AnnData], obs_key: str | None = None, figsize: tuple[int, int] = (2, 5)):
    """Show the proportion of each domain in the slide(s).

    Args:
        adata: One `AnnData` object, or a list of `AnnData` objects.
        obs_key: The key in `adata.obs` that contains the Novae domains. By default, the last available domain key is shown.
        figsize: Matplotlib figure size.
    """
    adatas = [adata] if isinstance(adata, AnnData) else adata

    obs_key = utils.check_available_domains_key(adatas, obs_key)

    all_domains, colors = get_categorical_color_palette(adatas, obs_key)

    names, series = [], []
    for adata_slide in utils.iter_slides(adatas):
        names.append(adata_slide.obs[Keys.SLIDE_ID].iloc[0])
        series.append(adata_slide.obs[obs_key].value_counts(normalize=True))

    df = pd.concat(series, axis=1)
    df.columns = names

    df.T.plot(kind="bar", stacked=True, figsize=figsize, color=dict(zip(all_domains, colors)))
    sns.despine(offset=10, trim=True)
    plt.legend(bbox_to_anchor=(1.04, 0.5), loc="center left", borderaxespad=0, frameon=False)
    plt.ylabel("Proportion")

novae.plot.pathway_scores(adata, pathways, obs_key=None, return_df=False, figsize=(10, 5), min_pathway_size=4, **kwargs)

Show a heatmap of pathway scores for each domain.

Info

Currently, this function only supports one slide per call.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object.

required
pathways dict[str, list[str]] | str

Either a dictionary of pathways (keys are pathway names, values are lists of gane names), or a path to a GSEA JSON file.

required
obs_key str | None

Key in adata.obs that contains the domains. By default, it will use the last available Novae domain key.

None
return_df bool

Whether to return the DataFrame.

False
figsize tuple[int, int]

Matplotlib figure size.

(10, 5)
min_pathway_size int

Minimum number of known genes in the pathway to be considered.

4

Returns:

Type Description
DataFrame | None

A DataFrame of scores per domain if return_df is True.

Source code in novae/plot/_heatmap.py
def pathway_scores(
    adata: AnnData,
    pathways: dict[str, list[str]] | str,
    obs_key: str | None = None,
    return_df: bool = False,
    figsize: tuple[int, int] = (10, 5),
    min_pathway_size: int = 4,
    **kwargs: int,
) -> pd.DataFrame | None:
    """Show a heatmap of pathway scores for each domain.

    Info:
        Currently, this function only supports one slide per call.

    Args:
        adata: An `AnnData` object.
        pathways: Either a dictionary of pathways (keys are pathway names, values are lists of gane names), or a path to a [GSEA](https://www.gsea-msigdb.org/gsea/msigdb/index.jsp) JSON file.
        obs_key: Key in `adata.obs` that contains the domains. By default, it will use the last available Novae domain key.
        return_df: Whether to return the DataFrame.
        figsize: Matplotlib figure size.
        min_pathway_size: Minimum number of known genes in the pathway to be considered.

    Returns:
        A DataFrame of scores per domain if `return_df` is True.
    """
    assert isinstance(adata, AnnData), f"For now, only AnnData objects are supported, received {type(adata)}"

    obs_key = utils.check_available_domains_key([adata], obs_key)

    scores = {}
    lower_var_names = adata.var_names.str.lower()

    if isinstance(pathways, str):
        pathways = _load_gsea_json(pathways)
        log.info(f"Loaded {len(pathways)} pathway(s)")

    for key, gene_names in pathways.items():
        vars = np.array([gene_name.lower() for gene_name in gene_names])
        vars = adata.var_names[np.isin(lower_var_names, vars)]
        if len(vars) >= min_pathway_size:
            sc.tl.score_genes(adata, vars, score_name="_temp")
            scores[key] = adata.obs["_temp"]
    del adata.obs["_temp"]

    assert len(scores) > 1, f"Found {len(scores)} valid pathway. Minimum 2 required."

    df = pd.DataFrame(scores)
    df[obs_key] = adata.obs[obs_key]
    df = df.groupby(obs_key).mean()
    df = df.fillna(0)

    g = sns.clustermap(df, figsize=figsize, **kwargs)
    plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)

    if return_df:
        return df

novae.plot.paga(adata, obs_key=None, **paga_plot_kwargs)

Plot a PAGA graph.

Info

Currently, this function only supports one slide per call.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object.

required
obs_key str | None

Name of the key from adata.obs containing the Novae domains. By default, the last available domain key is shown.

None
**paga_plot_kwargs int

Additional arguments for sc.pl.paga.

{}
Source code in novae/plot/_graph.py
def paga(adata: AnnData, obs_key: str | None = None, **paga_plot_kwargs: int):
    """Plot a PAGA graph.

    Info:
        Currently, this function only supports one slide per call.

    Args:
        adata: An AnnData object.
        obs_key: Name of the key from `adata.obs` containing the Novae domains. By default, the last available domain key is shown.
        **paga_plot_kwargs: Additional arguments for `sc.pl.paga`.
    """
    assert isinstance(adata, AnnData), f"For now, only AnnData objects are supported, received {type(adata)}"

    obs_key = utils.check_available_domains_key([adata], obs_key)

    get_categorical_color_palette([adata], obs_key)

    adata_clean = adata[~adata.obs[obs_key].isna()]

    if "paga" not in adata.uns or adata.uns["paga"]["groups"] != obs_key:
        sc.pp.neighbors(adata_clean, use_rep=Keys.REPR)
        sc.tl.paga(adata_clean, groups=obs_key)

        adata.uns["paga"] = adata_clean.uns["paga"]
        adata.uns[f"{obs_key}_sizes"] = adata_clean.uns[f"{obs_key}_sizes"]

    sc.pl.paga(adata_clean, title=f"PAGA graph ({obs_key})", show=False, **paga_plot_kwargs)
    sns.despine(offset=10, trim=True, bottom=True)

novae.plot.spatially_variable_genes(adata, obs_key=None, top_k=5, show=False, cell_size=10, min_positive_ratio=0.05, return_list=False, **kwargs)

Plot the most spatially variable genes (SVG) for a given AnnData object.

Info

Currently, this function only supports one slide per call.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object corresponding to one slide.

required
obs_key str | None

Key in adata.obs that contains the domains. By default, it will use the last available Novae domain key.

None
top_k int

Number of SVG to be shown.

5
show bool

Whether to show the plot.

False
cell_size int

Size of the cells or spots (spot_size argument of sc.pl.spatial).

10
min_positive_ratio float

Genes whose "ratio of cells expressing it" is lower than this threshold are not considered.

0.05
return_list bool

Whether to return the list of SVG instead of plotting them.

False
**kwargs int

Additional arguments for sc.pl.spatial.

{}

Returns:

Type Description
None | list[str]

A list of SVG names if return_list is True.

Source code in novae/plot/_spatial.py
def spatially_variable_genes(
    adata: AnnData,
    obs_key: str | None = None,
    top_k: int = 5,
    show: bool = False,
    cell_size: int = 10,
    min_positive_ratio: float = 0.05,
    return_list: bool = False,
    **kwargs: int,
) -> None | list[str]:
    """Plot the most spatially variable genes (SVG) for a given `AnnData` object.

    !!! info
        Currently, this function only supports one slide per call.

    Args:
        adata: An `AnnData` object corresponding to one slide.
        obs_key: Key in `adata.obs` that contains the domains. By default, it will use the last available Novae domain key.
        top_k: Number of SVG to be shown.
        show: Whether to show the plot.
        cell_size: Size of the cells or spots (`spot_size` argument of `sc.pl.spatial`).
        min_positive_ratio: Genes whose "ratio of cells expressing it" is lower than this threshold are not considered.
        return_list: Whether to return the list of SVG instead of plotting them.
        **kwargs: Additional arguments for `sc.pl.spatial`.

    Returns:
        A list of SVG names if `return_list` is `True`.
    """
    assert isinstance(adata, AnnData), f"Received adata of type {type(adata)}. Currently only AnnData is supported."

    obs_key = utils.check_available_domains_key([adata], obs_key)

    sc.tl.rank_genes_groups(adata, groupby=obs_key)
    df = pd.concat(
        [
            sc.get.rank_genes_groups_df(adata, domain).set_index("names")["logfoldchanges"]
            for domain in adata.obs[obs_key].cat.categories
        ],
        axis=1,
    )

    where = (adata.X > 0).mean(0) > min_positive_ratio
    valid_vars = adata.var_names[where.A1 if isinstance(where, np.matrix) else where]
    assert (
        len(valid_vars) >= top_k
    ), f"Only {len(valid_vars)} genes are available. Please decrease `top_k` or `min_positive_ratio`."

    svg = df.std(1).loc[valid_vars].sort_values(ascending=False).head(top_k).index

    if return_list:
        return svg.tolist()

    sc.pl.spatial(adata, color=svg, spot_size=cell_size, show=show, **kwargs)