Plotting

`novae.plot.domains(adata, obs_key=None, slide_name_key=None, cell_size=None, ncols=4, fig_size_per_slide=(5, 5), na_color='#ccc', show=True, library_id=None, **kwargs)`

Show the Novae spatial domains for all slides in the AnnData object.

Info

Make sure you have already your Novae domains assigned to the AnnData object. You can use model.assign_domains(...) to do so.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData \| list[AnnData]`	An `AnnData` object, or a list of `AnnData` objects.	required
`obs_key`	`str \| None`	Name of the key from `adata.obs` containing the Novae domains. By default, the last available domain key is shown.	`None`
`slide_name_key`	`str \| None`	Key of `adata.obs` that contains the slide names. By default, uses the Novae unique slide ID.	`None`
`cell_size`	`int \| None`	Size of the cells or spots. By default, it uses the median distance between neighbor cells.	`None`
`ncols`	`int`	Number of columns to be shown.	`4`
`fig_size_per_slide`	`tuple[int, int]`	Size of the figure for each slide.	`(5, 5)`
`na_color`	`str`	Color for cells that does not belong to any domain (i.e. cells with a too small neighborhood).	`'#ccc'`
`show`	`bool`	Whether to show the plot.	`True`
`library_id`	`str \| None`	`library_id` argument for `sc.pl.spatial`.	`None`
`**kwargs`	`int`	Additional arguments for `sc.pl.spatial`.	`{}`

Source code in novae/plot/_spatial.py

def domains(
    adata: AnnData | list[AnnData],
    obs_key: str | None = None,
    slide_name_key: str | None = None,
    cell_size: int | None = None,
    ncols: int = 4,
    fig_size_per_slide: tuple[int, int] = (5, 5),
    na_color: str = "#ccc",
    show: bool = True,
    library_id: str | None = None,
    **kwargs: int,
):
    """Show the Novae spatial domains for all slides in the `AnnData` object.

    Info:
        Make sure you have already your Novae domains assigned to the `AnnData` object. You can use `model.assign_domains(...)` to do so.

    Args:
        adata: An `AnnData` object, or a list of `AnnData` objects.
        obs_key: Name of the key from `adata.obs` containing the Novae domains. By default, the last available domain key is shown.
        slide_name_key: Key of `adata.obs` that contains the slide names. By default, uses the Novae unique slide ID.
        cell_size: Size of the cells or spots. By default, it uses the median distance between neighbor cells.
        ncols: Number of columns to be shown.
        fig_size_per_slide: Size of the figure for each slide.
        na_color: Color for cells that does not belong to any domain (i.e. cells with a too small neighborhood).
        show: Whether to show the plot.
        library_id: `library_id` argument for `sc.pl.spatial`.
        **kwargs: Additional arguments for `sc.pl.spatial`.
    """
    if obs_key is not None:
        assert str(obs_key).startswith(Keys.DOMAINS_PREFIX), f"Received {obs_key=}, which is not a valid Novae obs_key"

    adatas = adata if isinstance(adata, list) else [adata]
    slide_name_key = utils.check_slide_name_key(adatas, slide_name_key)
    obs_key = utils.check_available_domains_key(adatas, obs_key)

    for adata in adatas:
        sanitize_anndata(adata)

    all_domains, colors = get_categorical_color_palette(adatas, obs_key)
    cell_size = cell_size or _get_default_cell_size(adata)

    fig, axes = _subplots_per_slide(adatas, ncols, fig_size_per_slide)

    for i, adata in enumerate(utils.iter_slides(adatas)):
        ax = axes[i // ncols, i % ncols]
        slide_name = adata.obs[slide_name_key].iloc[0]
        assert len(np.unique(adata.obs[slide_name_key])) == 1

        sc.pl.spatial(
            adata,
            spot_size=cell_size,
            color=obs_key,
            ax=ax,
            show=False,
            library_id=library_id,
            **kwargs,
        )
        sns.despine(ax=ax, offset=10, trim=True)
        ax.get_legend().remove()
        ax.set_title(slide_name)

    [fig.delaxes(ax) for ax in axes.flatten() if not ax.has_data()]  # remove unused subplots

    title = f"Novae domains ({obs_key})"

    if i == 0:
        axes[0, 0].set_title(title)
    else:
        fig.suptitle(title, fontsize=14, y=1.15)

    handles = [
        Line2D([0], [0], marker="o", color="w", markerfacecolor=color, markersize=8, linestyle="None")
        for color in colors + [na_color]
    ]
    fig.legend(
        handles,
        all_domains + ["NA"],
        loc="upper center" if i > 1 else "center left",
        bbox_to_anchor=(0.5, 1.1) if i > 1 else (1.04, 0.5),
        borderaxespad=0,
        frameon=False,
        ncol=len(colors) // (3 if i > 1 else 10) + 1,
    )

    if show:
        plt.show()

`novae.plot.domains_proportions(adata, obs_key=None, slide_name_key=None, figsize=(2, 5), show=True)`

Show the proportion of each domain in the slide(s).

Parameters:

Name	Type	Description	Default
`adata`	`AnnData \| list[AnnData]`	One `AnnData` object, or a list of `AnnData` objects.	required
`obs_key`	`str \| None`	The key in `adata.obs` that contains the Novae domains. By default, the last available domain key is shown.	`None`
`figsize`	`tuple[int, int]`	Matplotlib figure size.	`(2, 5)`
`show`	`bool`	Whether to show the plot.	`True`

Source code in novae/plot/_bar.py

def domains_proportions(
    adata: AnnData | list[AnnData],
    obs_key: str | None = None,
    slide_name_key: str | None = None,
    figsize: tuple[int, int] = (2, 5),
    show: bool = True,
):
    """Show the proportion of each domain in the slide(s).

    Args:
        adata: One `AnnData` object, or a list of `AnnData` objects.
        obs_key: The key in `adata.obs` that contains the Novae domains. By default, the last available domain key is shown.
        figsize: Matplotlib figure size.
        show: Whether to show the plot.
    """
    adatas = [adata] if isinstance(adata, AnnData) else adata
    slide_name_key = utils.check_slide_name_key(adatas, slide_name_key)
    obs_key = utils.check_available_domains_key(adatas, obs_key)

    all_domains, colors = get_categorical_color_palette(adatas, obs_key)

    names, series = [], []
    for adata_slide in utils.iter_slides(adatas):
        names.append(adata_slide.obs[slide_name_key].iloc[0])
        series.append(adata_slide.obs[obs_key].value_counts(normalize=True))

    df = pd.concat(series, axis=1)
    df.columns = names

    df.T.plot(kind="bar", stacked=True, figsize=figsize, color=dict(zip(all_domains, colors)))
    sns.despine(offset=10, trim=True)
    plt.legend(bbox_to_anchor=(1.04, 0.5), loc="center left", borderaxespad=0, frameon=False)
    plt.ylabel("Proportion")
    plt.xticks(rotation=90)

    if show:
        plt.show()

`novae.plot.connectivities(adata, ngh_threshold=2, cell_size=2, ncols=4, fig_size_per_slide=(5, 5), linewidths=0.1, line_color='#333', cmap='rocket', color_isolated_cells='orangered', show=True)`

Show the graph of the spatial connectivities between cells. By default, the cells which have a number of neighbors inferior to ngh_threshold are shown in red. If ngh_threshold is None, the cells are colored by the number of neighbors.

Quality control

This plot is useful to check the quality of the spatial connectivities obtained via novae.spatial_neighbors. Make sure few cells (e.g., less than 5%) have a number of neighbors below ngh_threshold. If too many cells are isolated, you may want to increase the radius parameter in novae.spatial_neighbors. Conversely, if there are some less that are really far from each other, but still connected, so may want to decrease the radius parameter to disconnect them.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An AnnData object.	required
`ngh_threshold`	`int \| None`	Only cells with a number of neighbors below this threshold are shown (with color `color_isolated_cells`). If `None`, cells are colored by the number of neighbors.	`2`
`cell_size`	`int`	Size of the dots for each cell. By default, it uses the median distance between neighbor cells.	`2`
`ncols`	`int`	Number of columns to be shown.	`4`
`fig_size_per_slide`	`tuple[int, int]`	Size of the figure for each slide.	`(5, 5)`
`linewidths`	`float`	Width of the lines/edges connecting the cells.	`0.1`
`line_color`	`str`	Color of the lines/edges.	`'#333'`
`cmap`	`str`	Name of the colormap to use for the number of neighbors.	`'rocket'`
`color_isolated_cells`	`str`	Color for the cells with a number of neighbors below `ngh_threshold` (if not `None`).	`'orangered'`
`show`	`bool`	Whether to show the plot.	`True`

Source code in novae/plot/_graph.py

def connectivities(
    adata: AnnData,
    ngh_threshold: int | None = 2,
    cell_size: int = 2,
    ncols: int = 4,
    fig_size_per_slide: tuple[int, int] = (5, 5),
    linewidths: float = 0.1,
    line_color: str = "#333",
    cmap: str = "rocket",
    color_isolated_cells: str = "orangered",
    show: bool = True,
):
    """Show the graph of the spatial connectivities between cells. By default,
    the cells which have a number of neighbors inferior to `ngh_threshold` are shown
    in red. If `ngh_threshold` is `None`, the cells are colored by the number of neighbors.

    !!! info "Quality control"
        This plot is useful to check the quality of the spatial connectivities obtained via [novae.spatial_neighbors][].
        Make sure few cells (e.g., less than 5%) have a number of neighbors below `ngh_threshold`.
        If too many cells are isolated, you may want to increase the `radius` parameter in [novae.spatial_neighbors][].
        Conversely, if there are some less that are really **far from each other**, but still connected, so may want to decrease the `radius` parameter to **disconnect** them.

    Args:
        adata: An AnnData object.
        ngh_threshold: Only cells with a number of neighbors below this threshold are shown (with color `color_isolated_cells`). If `None`, cells are colored by the number of neighbors.
        cell_size: Size of the dots for each cell. By default, it uses the median distance between neighbor cells.
        ncols: Number of columns to be shown.
        fig_size_per_slide: Size of the figure for each slide.
        linewidths: Width of the lines/edges connecting the cells.
        line_color: Color of the lines/edges.
        cmap: Name of the colormap to use for the number of neighbors.
        color_isolated_cells: Color for the cells with a number of neighbors below `ngh_threshold` (if not `None`).
        show: Whether to show the plot.
    """
    adatas = [adata] if isinstance(adata, AnnData) else adata

    fig, axes = _subplots_per_slide(adatas, ncols, fig_size_per_slide)

    for i, adata in enumerate(utils.iter_slides(adatas)):
        ax = axes[i // ncols, i % ncols]

        utils.check_has_spatial_adjancency(adata)

        X, A = adata.obsm["spatial"], adata.obsp[Keys.ADJ]

        ax.invert_yaxis()
        ax.axes.set_aspect("equal")

        rows, cols = A.nonzero()
        mask = rows < cols
        rows, cols = rows[mask], cols[mask]
        edge_segments = np.stack([X[rows], X[cols]], axis=1)
        edges = LineCollection(edge_segments, color=line_color, linewidths=linewidths, zorder=1)
        ax.add_collection(edges)

        n_neighbors = (A > 0).sum(1).A1

        if ngh_threshold is None:
            _ = ax.scatter(X[:, 0], X[:, 1], c=n_neighbors, s=cell_size, zorder=2, cmap=cmap)
            plt.colorbar(_, ax=ax)
        else:
            isolated_cells = n_neighbors < ngh_threshold
            ax.scatter(X[isolated_cells, 0], X[isolated_cells, 1], color=color_isolated_cells, s=cell_size, zorder=2)

        ax.set_title(adata.obs[Keys.SLIDE_ID].iloc[0])

    [fig.delaxes(ax) for ax in axes.flatten() if not ax.has_data()]  # remove unused subplots

    title = "Node connectivities" + (f" (threshold={ngh_threshold} neighbors)" if ngh_threshold is not None else "")

    if i == 0:
        axes[0, 0].set_title(title)
    else:
        fig.suptitle(title, fontsize=14)

    if show:
        plt.show()

`novae.plot.pathway_scores(adata, pathways, obs_key=None, pathway_name=None, slide_name_key=None, return_df=False, figsize=(10, 5), min_pathway_size=4, show=True, **kwargs)`

Show a heatmap of either (i) the score of multiple pathways for each domain, or (ii) one pathway score for each domain and for each slide. To use the latter case, provide pathway_name, or make sure to have only one pathway in pathways.

Info

Currently, this function only supports one AnnData object per call.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`pathways`	`dict[str, list[str]] \| str`	Either a dictionary of pathways (keys are pathway names, values are lists of gene names), or a path to a GSEA JSON file.	required
`obs_key`	`str \| None`	Key in `adata.obs` that contains the domains. By default, it will use the last available Novae domain key.	`None`
`pathway_name`	`str \| None`	If `None`, all pathways will be shown (first mode). If not `None`, this specific pathway will be shown, for all domains and all slides (second mode).	`None`
`slide_name_key`	`str \| None`	Key of `adata.obs` that contains the slide names. By default, uses the Novae unique slide ID.	`None`
`return_df`	`bool`	Whether to return the DataFrame.	`False`
`figsize`	`tuple[int, int]`	Matplotlib figure size.	`(10, 5)`
`min_pathway_size`	`int`	Minimum number of known genes in the pathway to be considered.	`4`
`show`	`bool`	Whether to show the plot.	`True`

Returns:

Type	Description
`DataFrame \| None`	A DataFrame of scores per domain if `return_df` is True.

Source code in novae/plot/_heatmap.py

def pathway_scores(
    adata: AnnData,
    pathways: dict[str, list[str]] | str,
    obs_key: str | None = None,
    pathway_name: str | None = None,
    slide_name_key: str | None = None,
    return_df: bool = False,
    figsize: tuple[int, int] = (10, 5),
    min_pathway_size: int = 4,
    show: bool = True,
    **kwargs: int,
) -> pd.DataFrame | None:
    """Show a heatmap of either (i) the score of multiple pathways for each domain, or (ii) one pathway score for each domain and for each slide.
    To use the latter case, provide `pathway_name`, or make sure to have only one pathway in `pathways`.

    Info:
        Currently, this function only supports one AnnData object per call.

    Args:
        adata: An `AnnData` object.
        pathways: Either a dictionary of pathways (keys are pathway names, values are lists of gene names), or a path to a [GSEA](https://www.gsea-msigdb.org/gsea/msigdb/index.jsp) JSON file.
        obs_key: Key in `adata.obs` that contains the domains. By default, it will use the last available Novae domain key.
        pathway_name: If `None`, all pathways will be shown (first mode). If not `None`, this specific pathway will be shown, for all domains and all slides (second mode).
        slide_name_key: Key of `adata.obs` that contains the slide names. By default, uses the Novae unique slide ID.
        return_df: Whether to return the DataFrame.
        figsize: Matplotlib figure size.
        min_pathway_size: Minimum number of known genes in the pathway to be considered.
        show: Whether to show the plot.

    Returns:
        A DataFrame of scores per domain if `return_df` is True.
    """
    assert isinstance(adata, AnnData), f"For now, only one AnnData object is supported, received {type(adata)}"

    obs_key = utils.check_available_domains_key([adata], obs_key)

    if isinstance(pathways, str):
        pathways = _load_gsea_json(pathways)
        log.info(f"Loaded {len(pathways)} pathway(s)")

    if len(pathways) == 1:
        pathway_name = list(pathways.keys())[0]

    if pathway_name is not None:
        gene_names = pathways[pathway_name]
        is_valid = _get_pathway_score(adata, gene_names, min_pathway_size)
        assert is_valid, f"Pathway '{pathway_name}' has less than {min_pathway_size} genes in the dataset."
    else:
        scores = {}

        for key, gene_names in pathways.items():
            is_valid = _get_pathway_score(adata, gene_names, min_pathway_size)
            if is_valid:
                scores[key] = adata.obs[TEMP_KEY]

    if pathway_name is not None:
        log.info(f"Plot mode: {pathway_name} score per domain per slide")

        slide_name_key = utils.check_slide_name_key(adata, slide_name_key)

        df = adata.obs.groupby([obs_key, slide_name_key], observed=True)[TEMP_KEY].mean().unstack()
        df.columns.name = slide_name_key

        assert len(df) > 1, f"Found {len(df)} valid slide. Minimum 2 required."
    else:
        log.info(f"Plot mode: {len(scores)} pathways scores per domain")

        assert len(scores) > 1, f"Found {len(scores)} valid pathway. Minimum 2 required."

        df = pd.DataFrame(scores)
        df[obs_key] = adata.obs[obs_key]
        df = df.groupby(obs_key, observed=True).mean()
        df.columns.name = "Pathways"

    del adata.obs[TEMP_KEY]

    df = df.fillna(0)

    g = sns.clustermap(df, figsize=figsize, **kwargs)
    plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)

    if show:
        plt.show()

    if return_df:
        return df

`novae.plot.paga(adata, obs_key=None, show=True, **paga_plot_kwargs)`

Plot a PAGA graph.

Info

Currently, this function only supports one slide per call.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An AnnData object.	required
`obs_key`	`str \| None`	Name of the key from `adata.obs` containing the Novae domains. By default, the last available domain key is shown.	`None`
`show`	`bool`	Whether to show the plot.	`True`
`**paga_plot_kwargs`	`int`	Additional arguments for `sc.pl.paga`.	`{}`

Source code in novae/plot/_graph.py

def paga(adata: AnnData, obs_key: str | None = None, show: bool = True, **paga_plot_kwargs: int):
    """Plot a PAGA graph.

    Info:
        Currently, this function only supports one slide per call.

    Args:
        adata: An AnnData object.
        obs_key: Name of the key from `adata.obs` containing the Novae domains. By default, the last available domain key is shown.
        show: Whether to show the plot.
        **paga_plot_kwargs: Additional arguments for `sc.pl.paga`.
    """
    assert isinstance(adata, AnnData), f"For now, only AnnData objects are supported, received {type(adata)}"

    obs_key = utils.check_available_domains_key([adata], obs_key)

    get_categorical_color_palette([adata], obs_key)

    adata_clean = adata[~adata.obs[obs_key].isna()]

    if "paga" not in adata.uns or adata.uns["paga"]["groups"] != obs_key:
        sc.pp.neighbors(adata_clean, use_rep=Keys.REPR)
        sc.tl.paga(adata_clean, groups=obs_key)

        adata.uns["paga"] = adata_clean.uns["paga"]
        adata.uns[f"{obs_key}_sizes"] = adata_clean.uns[f"{obs_key}_sizes"]

    sc.pl.paga(adata_clean, title=f"PAGA graph ({obs_key})", show=False, **paga_plot_kwargs)
    sns.despine(offset=10, trim=True, bottom=True)

    if show:
        plt.show()

`novae.plot.spatially_variable_genes(adata, obs_key=None, top_k=5, cell_size=None, min_positive_ratio=0.05, return_list=False, show=True, **kwargs)`

Plot the most spatially variable genes (SVG) for a given AnnData object.

Info

Currently, this function only supports one slide per call.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object corresponding to one slide.	required
`obs_key`	`str \| None`	Key in `adata.obs` that contains the domains. By default, it will use the last available Novae domain key.	`None`
`top_k`	`int`	Number of SVG to be shown.	`5`
`cell_size`	`int \| None`	Size of the cells or spots (`spot_size` argument of `sc.pl.spatial`). By default, it uses the median distance between neighbor cells.	`None`
`min_positive_ratio`	`float`	Genes whose "ratio of cells expressing it" is lower than this threshold are not considered.	`0.05`
`return_list`	`bool`	Whether to return the list of SVG instead of plotting them.	`False`
`show`	`bool`	Whether to show the plot.	`True`
`**kwargs`	`int`	Additional arguments for `sc.pl.spatial`.	`{}`

Returns:

Type	Description
`None \| list[str]`	A list of SVG names if `return_list` is `True`.

Source code in novae/plot/_spatial.py

def spatially_variable_genes(
    adata: AnnData,
    obs_key: str | None = None,
    top_k: int = 5,
    cell_size: int | None = None,
    min_positive_ratio: float = 0.05,
    return_list: bool = False,
    show: bool = True,
    **kwargs: int,
) -> None | list[str]:
    """Plot the most spatially variable genes (SVG) for a given `AnnData` object.

    !!! info
        Currently, this function only supports one slide per call.

    Args:
        adata: An `AnnData` object corresponding to one slide.
        obs_key: Key in `adata.obs` that contains the domains. By default, it will use the last available Novae domain key.
        top_k: Number of SVG to be shown.
        cell_size: Size of the cells or spots (`spot_size` argument of `sc.pl.spatial`). By default, it uses the median distance between neighbor cells.
        min_positive_ratio: Genes whose "ratio of cells expressing it" is lower than this threshold are not considered.
        return_list: Whether to return the list of SVG instead of plotting them.
        show: Whether to show the plot.
        **kwargs: Additional arguments for `sc.pl.spatial`.

    Returns:
        A list of SVG names if `return_list` is `True`.
    """
    assert isinstance(adata, AnnData), f"Received adata of type {type(adata)}. Currently only AnnData is supported."

    obs_key = utils.check_available_domains_key([adata], obs_key)

    sc.tl.rank_genes_groups(adata, groupby=obs_key)
    df = pd.concat(
        [
            sc.get.rank_genes_groups_df(adata, domain).set_index("names")["logfoldchanges"]
            for domain in adata.obs[obs_key].cat.categories
        ],
        axis=1,
    )

    where = (adata.X > 0).mean(0) > min_positive_ratio
    valid_vars = adata.var_names[where.A1 if isinstance(where, np.matrix) else where]
    assert (
        len(valid_vars) >= top_k
    ), f"Only {len(valid_vars)} genes are available. Please decrease `top_k` or `min_positive_ratio`."

    svg = df.std(1).loc[valid_vars].sort_values(ascending=False).head(top_k).index

    if return_list:
        return svg.tolist()

    sc.pl.spatial(adata, color=svg, spot_size=cell_size or _get_default_cell_size(adata), show=show, **kwargs)