Skip to content

Plotting

novae.plot.domains(adata, obs_key=None, slide_name_key=None, cell_size=10, ncols=4, fig_size_per_slide=(5, 5), na_color='#ccc', show=True, library_id=None, **kwargs)

Show the Novae spatial domains for all slides in the AnnData object.

Info

Make sure you have already your Novae domains assigned to the AnnData object. You can use model.assign_domains(...) to do so.

Parameters:

Name Type Description Default
adata AnnData | list[AnnData]

An AnnData object, or a list of AnnData objects.

required
obs_key str | None

Name of the key from adata.obs containing the Novae domains. By default, the last available domain key is shown.

None
slide_name_key str | None

Key of adata.obs that contains the slide names. By default, uses the Novae unique slide ID.

None
cell_size int | None

Size of the cells or spots.

10
ncols int

Number of columns to be shown.

4
fig_size_per_slide tuple[int, int]

Size of the figure for each slide.

(5, 5)
na_color str

Color for cells that does not belong to any domain (i.e. cells with a too small neighborhood).

'#ccc'
show bool

Whether to show the plot.

True
library_id str | None

library_id argument for sc.pl.spatial.

None
**kwargs int

Additional arguments for sc.pl.spatial.

{}
Source code in novae/plot/_spatial.py
def domains(
    adata: AnnData | list[AnnData],
    obs_key: str | None = None,
    slide_name_key: str | None = None,
    cell_size: int | None = 10,
    ncols: int = 4,
    fig_size_per_slide: tuple[int, int] = (5, 5),
    na_color: str = "#ccc",
    show: bool = True,
    library_id: str | None = None,
    **kwargs: int,
):
    """Show the Novae spatial domains for all slides in the `AnnData` object.

    Info:
        Make sure you have already your Novae domains assigned to the `AnnData` object. You can use `model.assign_domains(...)` to do so.

    Args:
        adata: An `AnnData` object, or a list of `AnnData` objects.
        obs_key: Name of the key from `adata.obs` containing the Novae domains. By default, the last available domain key is shown.
        slide_name_key: Key of `adata.obs` that contains the slide names. By default, uses the Novae unique slide ID.
        cell_size: Size of the cells or spots.
        ncols: Number of columns to be shown.
        fig_size_per_slide: Size of the figure for each slide.
        na_color: Color for cells that does not belong to any domain (i.e. cells with a too small neighborhood).
        show: Whether to show the plot.
        library_id: `library_id` argument for `sc.pl.spatial`.
        **kwargs: Additional arguments for `sc.pl.spatial`.
    """
    if obs_key is not None:
        assert str(obs_key).startswith(Keys.DOMAINS_PREFIX), f"Received {obs_key=}, which is not a valid Novae obs_key"

    adatas = adata if isinstance(adata, list) else [adata]
    slide_name_key = utils.check_slide_name_key(adatas, slide_name_key)
    obs_key = utils.check_available_domains_key(adatas, obs_key)

    for adata in adatas:
        sanitize_anndata(adata)

    all_domains, colors = get_categorical_color_palette(adatas, obs_key)

    fig, axes = _subplots_per_slide(adatas, ncols, fig_size_per_slide)

    for i, adata in enumerate(utils.iter_slides(adatas)):
        ax = axes[i // ncols, i % ncols]
        slide_name = adata.obs[slide_name_key].iloc[0]
        assert len(np.unique(adata.obs[slide_name_key])) == 1

        sc.pl.spatial(
            adata,
            spot_size=cell_size,
            color=obs_key,
            ax=ax,
            show=False,
            library_id=library_id,
            **kwargs,
        )
        sns.despine(ax=ax, offset=10, trim=True)
        ax.get_legend().remove()
        ax.set_title(slide_name)

    [fig.delaxes(ax) for ax in axes.flatten() if not ax.has_data()]  # remove unused subplots

    title = f"Novae domains ({obs_key})"

    if i == 0:
        axes[0, 0].set_title(title)
    else:
        fig.suptitle(title, fontsize=14, y=1.15)

    handles = [
        Line2D([0], [0], marker="o", color="w", markerfacecolor=color, markersize=8, linestyle="None")
        for color in colors + [na_color]
    ]
    fig.legend(
        handles,
        all_domains + ["NA"],
        loc="upper center" if i > 1 else "center left",
        bbox_to_anchor=(0.5, 1.1) if i > 1 else (1.04, 0.5),
        borderaxespad=0,
        frameon=False,
        ncol=len(colors) // (3 if i > 1 else 10) + 1,
    )

    if show:
        plt.show()

novae.plot.domains_proportions(adata, obs_key=None, slide_name_key=None, figsize=(2, 5), show=True)

Show the proportion of each domain in the slide(s).

Parameters:

Name Type Description Default
adata AnnData | list[AnnData]

One AnnData object, or a list of AnnData objects.

required
obs_key str | None

The key in adata.obs that contains the Novae domains. By default, the last available domain key is shown.

None
figsize tuple[int, int]

Matplotlib figure size.

(2, 5)
show bool

Whether to show the plot.

True
Source code in novae/plot/_bar.py
def domains_proportions(
    adata: AnnData | list[AnnData],
    obs_key: str | None = None,
    slide_name_key: str | None = None,
    figsize: tuple[int, int] = (2, 5),
    show: bool = True,
):
    """Show the proportion of each domain in the slide(s).

    Args:
        adata: One `AnnData` object, or a list of `AnnData` objects.
        obs_key: The key in `adata.obs` that contains the Novae domains. By default, the last available domain key is shown.
        figsize: Matplotlib figure size.
        show: Whether to show the plot.
    """
    adatas = [adata] if isinstance(adata, AnnData) else adata
    slide_name_key = utils.check_slide_name_key(adatas, slide_name_key)
    obs_key = utils.check_available_domains_key(adatas, obs_key)

    all_domains, colors = get_categorical_color_palette(adatas, obs_key)

    names, series = [], []
    for adata_slide in utils.iter_slides(adatas):
        names.append(adata_slide.obs[slide_name_key].iloc[0])
        series.append(adata_slide.obs[obs_key].value_counts(normalize=True))

    df = pd.concat(series, axis=1)
    df.columns = names

    df.T.plot(kind="bar", stacked=True, figsize=figsize, color=dict(zip(all_domains, colors)))
    sns.despine(offset=10, trim=True)
    plt.legend(bbox_to_anchor=(1.04, 0.5), loc="center left", borderaxespad=0, frameon=False)
    plt.ylabel("Proportion")
    plt.xticks(rotation=90)

    if show:
        plt.show()

novae.plot.connectivities(adata, ngh_threshold=2, cell_size=5, ncols=4, fig_size_per_slide=(5, 5), linewidths=0.1, line_color='#333', cmap='rocket', color_isolated_cells='orangered', show=True)

Show the graph of the spatial connectivities between cells. By default, the cells which have a number of neighbors inferior to ngh_threshold are shown in red. If ngh_threshold is None, the cells are colored by the number of neighbors.

Quality control

This plot is useful to check the quality of the spatial connectivities obtained via novae.utils.spatial_neighbors. Make sure few cells (e.g., less than 5%) have a number of neighbors below ngh_threshold. If too many cells are isolated, you may want to increase the radius parameter in novae.utils.spatial_neighbors. Conversely, if there are some less that are really far from each other, but still connected, so may want to decrease the radius parameter to disconnect them.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object.

required
ngh_threshold int | None

Only cells with a number of neighbors below this threshold are shown (with color color_isolated_cells). If None, cells are colored by the number of neighbors.

2
cell_size int

Size of the dots for each cell.

5
ncols int

Number of columns to be shown.

4
fig_size_per_slide tuple[int, int]

Size of the figure for each slide.

(5, 5)
linewidths float

Width of the lines/edges connecting the cells.

0.1
line_color str

Color of the lines/edges.

'#333'
cmap str

Name of the colormap to use for the number of neighbors.

'rocket'
color_isolated_cells str

Color for the cells with a number of neighbors below ngh_threshold (if not None).

'orangered'
show bool

Whether to show the plot.

True
Source code in novae/plot/_graph.py
def connectivities(
    adata: AnnData,
    ngh_threshold: int | None = 2,
    cell_size: int = 5,
    ncols: int = 4,
    fig_size_per_slide: tuple[int, int] = (5, 5),
    linewidths: float = 0.1,
    line_color: str = "#333",
    cmap: str = "rocket",
    color_isolated_cells: str = "orangered",
    show: bool = True,
):
    """Show the graph of the spatial connectivities between cells. By default,
    the cells which have a number of neighbors inferior to `ngh_threshold` are shown
    in red. If `ngh_threshold` is `None`, the cells are colored by the number of neighbors.

    !!! info "Quality control"
        This plot is useful to check the quality of the spatial connectivities obtained via [novae.utils.spatial_neighbors][].
        Make sure few cells (e.g., less than 5%) have a number of neighbors below `ngh_threshold`.
        If too many cells are isolated, you may want to increase the `radius` parameter in [novae.utils.spatial_neighbors][].
        Conversely, if there are some less that are really **far from each other**, but still connected, so may want to decrease the `radius` parameter to **disconnect** them.

    Args:
        adata: An AnnData object.
        ngh_threshold: Only cells with a number of neighbors below this threshold are shown (with color `color_isolated_cells`). If `None`, cells are colored by the number of neighbors.
        cell_size: Size of the dots for each cell.
        ncols: Number of columns to be shown.
        fig_size_per_slide: Size of the figure for each slide.
        linewidths: Width of the lines/edges connecting the cells.
        line_color: Color of the lines/edges.
        cmap: Name of the colormap to use for the number of neighbors.
        color_isolated_cells: Color for the cells with a number of neighbors below `ngh_threshold` (if not `None`).
        show: Whether to show the plot.
    """
    adatas = [adata] if isinstance(adata, AnnData) else adata

    fig, axes = _subplots_per_slide(adatas, ncols, fig_size_per_slide)

    for i, adata in enumerate(utils.iter_slides(adatas)):
        ax = axes[i // ncols, i % ncols]

        utils.check_has_spatial_adjancency(adata)

        X, A = adata.obsm["spatial"], adata.obsp[Keys.ADJ]

        ax.invert_yaxis()
        ax.axes.set_aspect("equal")

        rows, cols = A.nonzero()
        mask = rows < cols
        rows, cols = rows[mask], cols[mask]
        edge_segments = np.stack([X[rows], X[cols]], axis=1)
        edges = LineCollection(edge_segments, color=line_color, linewidths=linewidths, zorder=1)
        ax.add_collection(edges)

        n_neighbors = (A > 0).sum(1).A1

        if ngh_threshold is None:
            _ = ax.scatter(X[:, 0], X[:, 1], c=n_neighbors, s=cell_size, zorder=2, cmap=cmap)
            plt.colorbar(_, ax=ax)
        else:
            isolated_cells = n_neighbors < ngh_threshold
            ax.scatter(X[isolated_cells, 0], X[isolated_cells, 1], color=color_isolated_cells, s=cell_size, zorder=2)

        ax.set_title(adata.obs[Keys.SLIDE_ID].iloc[0])

    [fig.delaxes(ax) for ax in axes.flatten() if not ax.has_data()]  # remove unused subplots

    title = "Node connectivities" + (f" (threshold={ngh_threshold} neighbors)" if ngh_threshold is not None else "")

    if i == 0:
        axes[0, 0].set_title(title)
    else:
        fig.suptitle(title, fontsize=14)

    if show:
        plt.show()

novae.plot.pathway_scores(adata, pathways, obs_key=None, pathway_name=None, slide_name_key=None, return_df=False, figsize=(10, 5), min_pathway_size=4, show=True, **kwargs)

Show a heatmap of either (i) the score of multiple pathways for each domain, or (ii) one pathway score for each domain and for each slide. To use the latter case, provide pathway_name, or make sure to have only one pathway in pathways.

Info

Currently, this function only supports one AnnData object per call.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object.

required
pathways dict[str, list[str]] | str

Either a dictionary of pathways (keys are pathway names, values are lists of gene names), or a path to a GSEA JSON file.

required
obs_key str | None

Key in adata.obs that contains the domains. By default, it will use the last available Novae domain key.

None
pathway_name str | None

If None, all pathways will be shown (first mode). If not None, this specific pathway will be shown, for all domains and all slides (second mode).

None
slide_name_key str | None

Key of adata.obs that contains the slide names. By default, uses the Novae unique slide ID.

None
return_df bool

Whether to return the DataFrame.

False
figsize tuple[int, int]

Matplotlib figure size.

(10, 5)
min_pathway_size int

Minimum number of known genes in the pathway to be considered.

4
show bool

Whether to show the plot.

True

Returns:

Type Description
DataFrame | None

A DataFrame of scores per domain if return_df is True.

Source code in novae/plot/_heatmap.py
def pathway_scores(
    adata: AnnData,
    pathways: dict[str, list[str]] | str,
    obs_key: str | None = None,
    pathway_name: str | None = None,
    slide_name_key: str | None = None,
    return_df: bool = False,
    figsize: tuple[int, int] = (10, 5),
    min_pathway_size: int = 4,
    show: bool = True,
    **kwargs: int,
) -> pd.DataFrame | None:
    """Show a heatmap of either (i) the score of multiple pathways for each domain, or (ii) one pathway score for each domain and for each slide.
    To use the latter case, provide `pathway_name`, or make sure to have only one pathway in `pathways`.

    Info:
        Currently, this function only supports one AnnData object per call.

    Args:
        adata: An `AnnData` object.
        pathways: Either a dictionary of pathways (keys are pathway names, values are lists of gene names), or a path to a [GSEA](https://www.gsea-msigdb.org/gsea/msigdb/index.jsp) JSON file.
        obs_key: Key in `adata.obs` that contains the domains. By default, it will use the last available Novae domain key.
        pathway_name: If `None`, all pathways will be shown (first mode). If not `None`, this specific pathway will be shown, for all domains and all slides (second mode).
        slide_name_key: Key of `adata.obs` that contains the slide names. By default, uses the Novae unique slide ID.
        return_df: Whether to return the DataFrame.
        figsize: Matplotlib figure size.
        min_pathway_size: Minimum number of known genes in the pathway to be considered.
        show: Whether to show the plot.

    Returns:
        A DataFrame of scores per domain if `return_df` is True.
    """
    assert isinstance(adata, AnnData), f"For now, only one AnnData object is supported, received {type(adata)}"

    obs_key = utils.check_available_domains_key([adata], obs_key)

    if isinstance(pathways, str):
        pathways = _load_gsea_json(pathways)
        log.info(f"Loaded {len(pathways)} pathway(s)")

    if len(pathways) == 1:
        pathway_name = list(pathways.keys())[0]

    if pathway_name is not None:
        gene_names = pathways[pathway_name]
        is_valid = _get_pathway_score(adata, gene_names, min_pathway_size)
        assert is_valid, f"Pathway '{pathway_name}' has less than {min_pathway_size} genes in the dataset."
    else:
        scores = {}

        for key, gene_names in pathways.items():
            is_valid = _get_pathway_score(adata, gene_names, min_pathway_size)
            if is_valid:
                scores[key] = adata.obs[TEMP_KEY]

    if pathway_name is not None:
        log.info(f"Plot mode: {pathway_name} score per domain per slide")

        slide_name_key = utils.check_slide_name_key(adata, slide_name_key)

        df = adata.obs.groupby([obs_key, slide_name_key], observed=True)[TEMP_KEY].mean().unstack()
        df.columns.name = slide_name_key

        assert len(df) > 1, f"Found {len(df)} valid slide. Minimum 2 required."
    else:
        log.info(f"Plot mode: {len(scores)} pathways scores per domain")

        assert len(scores) > 1, f"Found {len(scores)} valid pathway. Minimum 2 required."

        df = pd.DataFrame(scores)
        df[obs_key] = adata.obs[obs_key]
        df = df.groupby(obs_key, observed=True).mean()
        df.columns.name = "Pathways"

    del adata.obs[TEMP_KEY]

    df = df.fillna(0)

    g = sns.clustermap(df, figsize=figsize, **kwargs)
    plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)

    if show:
        plt.show()

    if return_df:
        return df

novae.plot.paga(adata, obs_key=None, show=True, **paga_plot_kwargs)

Plot a PAGA graph.

Info

Currently, this function only supports one slide per call.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object.

required
obs_key str | None

Name of the key from adata.obs containing the Novae domains. By default, the last available domain key is shown.

None
show bool

Whether to show the plot.

True
**paga_plot_kwargs int

Additional arguments for sc.pl.paga.

{}
Source code in novae/plot/_graph.py
def paga(adata: AnnData, obs_key: str | None = None, show: bool = True, **paga_plot_kwargs: int):
    """Plot a PAGA graph.

    Info:
        Currently, this function only supports one slide per call.

    Args:
        adata: An AnnData object.
        obs_key: Name of the key from `adata.obs` containing the Novae domains. By default, the last available domain key is shown.
        show: Whether to show the plot.
        **paga_plot_kwargs: Additional arguments for `sc.pl.paga`.
    """
    assert isinstance(adata, AnnData), f"For now, only AnnData objects are supported, received {type(adata)}"

    obs_key = utils.check_available_domains_key([adata], obs_key)

    get_categorical_color_palette([adata], obs_key)

    adata_clean = adata[~adata.obs[obs_key].isna()]

    if "paga" not in adata.uns or adata.uns["paga"]["groups"] != obs_key:
        sc.pp.neighbors(adata_clean, use_rep=Keys.REPR)
        sc.tl.paga(adata_clean, groups=obs_key)

        adata.uns["paga"] = adata_clean.uns["paga"]
        adata.uns[f"{obs_key}_sizes"] = adata_clean.uns[f"{obs_key}_sizes"]

    sc.pl.paga(adata_clean, title=f"PAGA graph ({obs_key})", show=False, **paga_plot_kwargs)
    sns.despine(offset=10, trim=True, bottom=True)

    if show:
        plt.show()

novae.plot.spatially_variable_genes(adata, obs_key=None, top_k=5, cell_size=10, min_positive_ratio=0.05, return_list=False, show=True, **kwargs)

Plot the most spatially variable genes (SVG) for a given AnnData object.

Info

Currently, this function only supports one slide per call.

Parameters:

Name Type Description Default
adata AnnData

An AnnData object corresponding to one slide.

required
obs_key str | None

Key in adata.obs that contains the domains. By default, it will use the last available Novae domain key.

None
top_k int

Number of SVG to be shown.

5
cell_size int

Size of the cells or spots (spot_size argument of sc.pl.spatial).

10
min_positive_ratio float

Genes whose "ratio of cells expressing it" is lower than this threshold are not considered.

0.05
return_list bool

Whether to return the list of SVG instead of plotting them.

False
show bool

Whether to show the plot.

True
**kwargs int

Additional arguments for sc.pl.spatial.

{}

Returns:

Type Description
None | list[str]

A list of SVG names if return_list is True.

Source code in novae/plot/_spatial.py
def spatially_variable_genes(
    adata: AnnData,
    obs_key: str | None = None,
    top_k: int = 5,
    cell_size: int = 10,
    min_positive_ratio: float = 0.05,
    return_list: bool = False,
    show: bool = True,
    **kwargs: int,
) -> None | list[str]:
    """Plot the most spatially variable genes (SVG) for a given `AnnData` object.

    !!! info
        Currently, this function only supports one slide per call.

    Args:
        adata: An `AnnData` object corresponding to one slide.
        obs_key: Key in `adata.obs` that contains the domains. By default, it will use the last available Novae domain key.
        top_k: Number of SVG to be shown.
        cell_size: Size of the cells or spots (`spot_size` argument of `sc.pl.spatial`).
        min_positive_ratio: Genes whose "ratio of cells expressing it" is lower than this threshold are not considered.
        return_list: Whether to return the list of SVG instead of plotting them.
        show: Whether to show the plot.
        **kwargs: Additional arguments for `sc.pl.spatial`.

    Returns:
        A list of SVG names if `return_list` is `True`.
    """
    assert isinstance(adata, AnnData), f"Received adata of type {type(adata)}. Currently only AnnData is supported."

    obs_key = utils.check_available_domains_key([adata], obs_key)

    sc.tl.rank_genes_groups(adata, groupby=obs_key)
    df = pd.concat(
        [
            sc.get.rank_genes_groups_df(adata, domain).set_index("names")["logfoldchanges"]
            for domain in adata.obs[obs_key].cat.categories
        ],
        axis=1,
    )

    where = (adata.X > 0).mean(0) > min_positive_ratio
    valid_vars = adata.var_names[where.A1 if isinstance(where, np.matrix) else where]
    assert (
        len(valid_vars) >= top_k
    ), f"Only {len(valid_vars)} genes are available. Please decrease `top_k` or `min_positive_ratio`."

    svg = df.std(1).loc[valid_vars].sort_values(ascending=False).head(top_k).index

    if return_list:
        return svg.tolist()

    sc.pl.spatial(adata, color=svg, spot_size=cell_size, show=show, **kwargs)