Tools

`scyan.tools.umap(adata, markers=None, obsm=None, n_cells=200000, min_dist=0.5, obsm_key='X_umap', filter=None, **umap_kwargs)`

Run a UMAP on a specific set of markers (or all markers by default). It can be useful to show differences that are due to some markers of interest, instead of using the whole panel.

Info

This function returns a UMAP reducer. You can reuse it with reducer.transform(...) or save it with scyan.data.add.

Note

To actually plot the UMAP, use scyan.plot.umap.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`markers`	`Optional[List[str]]`	List marker names. By default, use all the panel markers, i.e., `adata.var_names`.	`None`
`obsm`	`Optional[str]`	Name of the obsm to consider to train the UMAP. By default, uses `adata.X`.	`None`
`n_cells`	`Optional[int]`	Number of cells to be considered for the UMAP (to accelerate it when \(N\) is very high). If `None`, consider all cells.	`200000`
`min_dist`	`float`	Min dist UMAP parameter.	`0.5`
`obsm_key`	`str`	Key for `adata.obsm` to add the embedding.	`'X_umap'`
`filter`	`Optional[Tuple]`	Optional tuple `(key, value)` used to train the UMAP on a set of cells that satisfies a constraint. `key` is the key of `adata.obs` to consider, and `value` the value the cells need to have.	`None`
`**umap_kwargs`	`int`	Optional kwargs to provide to the `UMAP` initialization.	`{}`

Returns:

Type	Description
`UMAP`	UMAP reducer.

Source code in scyan/tools/representation.py

def umap(
    adata: AnnData,
    markers: Optional[List[str]] = None,
    obsm: Optional[str] = None,
    n_cells: Optional[int] = 200_000,
    min_dist: float = 0.5,
    obsm_key: str = "X_umap",
    filter: Optional[Tuple] = None,
    **umap_kwargs: int,
) -> UMAP:
    """Run a [UMAP](https://umap-learn.readthedocs.io/en/latest/) on a specific set of markers (or all markers by default). It can be useful to show differences that are due to some markers of interest, instead of using the whole panel.

    !!! info

        This function returns a UMAP reducer. You can reuse it with `reducer.transform(...)` or save it with [scyan.data.add][].

    !!! note

        To actually plot the UMAP, use [scyan.plot.umap][].

    Args:
        adata: An `AnnData` object.
        markers: List marker names. By default, use all the panel markers, i.e., `adata.var_names`.
        obsm: Name of the obsm to consider to train the UMAP. By default, uses `adata.X`.
        n_cells: Number of cells to be considered for the UMAP (to accelerate it when $N$ is very high). If `None`, consider all cells.
        min_dist: Min dist UMAP parameter.
        obsm_key: Key for `adata.obsm` to add the embedding.
        filter: Optional tuple `(key, value)` used to train the UMAP on a set of cells that satisfies a constraint. `key` is the key of `adata.obs` to consider, and `value` the value the cells need to have.
        **umap_kwargs: Optional kwargs to provide to the `UMAP` initialization.

    Returns:
        UMAP reducer.
    """
    reducer = UMAP(min_dist=min_dist, **umap_kwargs)

    if markers is None:
        markers = adata.var_names

    adata.obsm[obsm_key] = np.zeros((adata.n_obs, 2))
    indices = _get_subset_indices(adata.n_obs, n_cells)
    adata_view = adata[indices, markers]
    X = adata_view.X if obsm is None else adata_view.obsm[obsm]

    _check_is_processed(X)

    log.info("Fitting UMAP...")
    if filter is None:
        embedding = reducer.fit_transform(X)
    else:
        key, value = filter
        reducer.fit(X[adata[indices].obs[key] == value])
        log.info("Transforming...")
        embedding = reducer.transform(X)

    adata.obsm[obsm_key][indices] = embedding

    return reducer

`scyan.tools.leiden(adata, resolution=1, key_added='leiden', n_neighbors=15)`

Leiden clustering

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	AnnData object.	required
`resolution`	`float`	Resolution of the clustering.	`1`
`key_added`	`str`	Name of the key of adata.obs where clusters will be saved.	`'leiden'`
`n_neighbors`	`int`	Number of neighbors.	`15`

Source code in scyan/tools/representation.py

def leiden(
    adata: AnnData,
    resolution: float = 1,
    key_added: str = "leiden",
    n_neighbors: int = 15,
) -> None:
    """Leiden clustering

    Args:
        adata: AnnData object.
        resolution: Resolution of the clustering.
        key_added: Name of the key of adata.obs where clusters will be saved.
        n_neighbors: Number of neighbors.
    """
    try:
        import leidenalg
    except:
        raise ImportError(
            """To run leiden, you need to have 'leidenalg' installed. You can install the population discovery extra with "pip install 'scyan[discovery]'", or directly install leidenalg with "conda install -c conda-forge leidenalg"."""
        )

    import igraph as ig
    from sklearn.neighbors import kneighbors_graph

    if not "knn_graph" in adata.obsp:
        adata.obsp["knn_graph"] = kneighbors_graph(
            adata.X, n_neighbors=n_neighbors, metric="euclidean", include_self=False
        )

    # TODO (improvement): add weights according to euclidean distance
    graph = ig.Graph.Weighted_Adjacency(adata.obsp["knn_graph"], mode="DIRECTED")

    partition = leidenalg.find_partition(
        graph,
        leidenalg.RBConfigurationVertexPartition,
        resolution_parameter=resolution,
    )
    adata.obs[key_added] = pd.Categorical([str(x) for x in partition.membership])

`scyan.tools.subcluster(adata, population, markers=None, key='scyan_pop', resolution=0.2, size_ratio_th=0.02, min_cells_th=200, n_cells=100000)`

Create sub-clusters among a given populations, and filters small clusters according to (i) a minimum number of cells and (ii) a minimum ratio of cells.

Info

After having run this method, you can analyze the results with scyan.plot.umap and scyan.plot.pops_expressions.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`population`	`str`	Name of the population to target (one of `adata.obs[key]`).	required
`markers`	`Optional[List[str]]`	Optional list of markers used to create subclusters. By default, uses the complete panel.	`None`
`key`	`str`	Key to look for population in `adata.obs`. By default, uses the model predictions, but you can also choose a population level (if any), or other observations.	`'scyan_pop'`
`resolution`	`float`	Resolution used for leiden clustering. Higher resolution leads to more clusters.	`0.2`
`size_ratio_th`	`float`	(Only used if `population` is `None`): Minimum ratio of cells to be considered as a significant cluster (compared to the parent cluster).	`0.02`
`min_cells_th`	`int`	(Only used if `population` is `None`): Minimum number of cells to be considered as a significant cluster.	`200`
`n_cells`	`int`	Number of cells to be considered for the subclustering (to accelerate it when \(N\) is very high). If `None`, consider all cells.	`100000`

Source code in scyan/tools/representation.py

def subcluster(
    adata: AnnData,
    population: str,
    markers: Optional[List[str]] = None,
    key: str = "scyan_pop",
    resolution: float = 0.2,
    size_ratio_th: float = 0.02,
    min_cells_th: int = 200,
    n_cells: int = 100_000,
) -> None:
    """Create sub-clusters among a given populations, and filters small clusters according to (i) a minimum number of cells and (ii) a minimum ratio of cells.
    !!! info
        After having run this method, you can analyze the results with [scyan.plot.umap][] and [scyan.plot.pops_expressions][].

    Args:
        adata: An `AnnData` object.
        population: Name of the population to target (one of `adata.obs[key]`).
        markers: Optional list of markers used to create subclusters. By default, uses the complete panel.
        key: Key to look for population in `adata.obs`. By default, uses the model predictions, but you can also choose a population level (if any), or other observations.
        resolution: Resolution used for leiden clustering. Higher resolution leads to more clusters.
        size_ratio_th: (Only used if `population` is `None`): Minimum ratio of cells to be considered as a significant cluster (compared to the parent cluster).
        min_cells_th: (Only used if `population` is `None`): Minimum number of cells to be considered as a significant cluster.
        n_cells: Number of cells to be considered for the subclustering (to accelerate it when $N$ is very high). If `None`, consider all cells.
    """
    leiden_key = f"leiden_{resolution}_{population}"
    subcluster_key = f"scyan_subcluster_{population}"
    condition = adata.obs[key] == population
    markers = list(adata.var_names if markers is None else markers)

    if leiden_key in adata.obs and adata.uns.get(leiden_key, []) == markers:
        log.info(
            "Found leiden labels with the same resolution. Skipping leiden clustering."
        )
        indices = np.where(~adata.obs[leiden_key].isna())[0]
        adata_sub = adata[indices, markers].copy()
    else:
        has_umap = _has_umap(adata)
        if has_umap.all() or condition.sum() <= n_cells:
            indices = _get_subset_indices(condition.sum(), n_cells)
            indices = np.where(condition)[0][indices]
        else:
            indices = _get_subset_indices((condition & has_umap).sum(), n_cells)
            indices = np.where(condition & has_umap)[0][indices]

            k = len(indices)
            if k < n_cells:
                indices2 = _get_subset_indices((condition & ~has_umap).sum(), n_cells - k)
                indices2 = np.where(condition & ~has_umap)[0][indices2]
                indices = np.concatenate([indices, indices2])

        adata_sub = adata[indices, markers].copy()

        leiden(adata_sub, resolution, leiden_key)

    series = pd.Series(index=np.arange(adata.n_obs), dtype=str)
    series[indices] = adata_sub.obs[leiden_key].values
    adata.obs[leiden_key] = series.values
    adata.obs[leiden_key] = adata.obs[leiden_key].astype("category")

    counts = adata_sub.obs[leiden_key].value_counts()
    remove = counts < max(counts.sum() * size_ratio_th, min_cells_th)

    assert (
        not remove.all()
    ), "All subclusters where filtered. Consider updating size_ratio_th and/or min_cells_th."

    adata_sub.obs.loc[
        np.isin(adata_sub.obs[leiden_key], remove[remove].index), leiden_key
    ] = np.nan

    series = pd.Series(index=np.arange(adata.n_obs), dtype=str)
    series[indices] = adata_sub.obs[leiden_key].values
    adata.obs[subcluster_key] = series.values
    adata.obs[subcluster_key] = adata.obs[subcluster_key].astype("category")

    adata.uns[leiden_key] = markers
    log.info(
        f"Subclusters created, you can now use:\n   - scyan.plot.umap(adata, color='{subcluster_key}') to show the clusters\n   - scyan.plot.pops_expressions(model, key='{subcluster_key}') to plot their expressions"
    )

`scyan.tools.palette_level(table, population_index=0, level_index=1, hue_shift=0.4, alpha_l=0.25, step_l=0.15, alpha_s=0.3, step_s=0.4)`

Computes a color palette that in grouped by the hierarchical main populations. It improves the UMAP readability when many populations are defined.

Info

Once such a color palette is defined, you can use it for plotting. For instance, try scyan.plot.umap(adata, color="scyan_pop", palette=palette), where palette is the one you created with this function.

Parameters:

Name	Type	Description	Default
`table`	`DataFrame`	Knowledge table provided to Scyan. It must be a multi-index DataFrame.	required
`population_index`	`Union[int, str]`	Index or name of the level in `table.index` storing the low-level/children population names.	`0`
`level_index`	`Union[int, str]`	Index or name of the level in `table.index` storing the main population names.	`1`
`hue_shift`	`float`	Shift the hue values. The value must be a float in `[0, 1]`.	`0.4`
`alpha_l`	`float`	Lower it to have a larger lightness range of colors.	`0.25`
`step_l`	`float`	Increase it to have more distinct colors (in term of lightness).	`0.15`
`alpha_s`	`float`	Lower it to have a larger saturation range of colors.	`0.3`
`step_s`	`float`	Increase it to have more distinct colors (in term of saturation).	`0.4`

Returns:

Type	Description
`Dict[str, Tuple[float]]`	A dictionnary whose keys are population names and values are RGB colors.

Source code in scyan/tools/colors.py

def palette_level(
    table: pd.DataFrame,
    population_index: Union[int, str] = 0,
    level_index: Union[int, str] = 1,
    hue_shift: float = 0.4,
    alpha_l: float = 0.25,
    step_l: float = 0.15,
    alpha_s: float = 0.3,
    step_s: float = 0.4,
) -> Dict[str, Tuple[float]]:
    """Computes a color palette that in grouped by the hierarchical main populations. It improves the UMAP readability when many populations are defined.

    !!! info
        Once such a color palette is defined, you can use it for plotting. For instance, try `scyan.plot.umap(adata, color="scyan_pop", palette=palette)`, where `palette` is the one you created with this function.

    Args:
        table: Knowledge table provided to Scyan. It must be a multi-index DataFrame.
        population_index: Index or name of the level in `table.index` storing the low-level/children population names.
        level_index: Index or name of the level in `table.index` storing the main population names.
        hue_shift: Shift the hue values. The value must be a float in `[0, 1]`.
        alpha_l: Lower it to have a larger lightness range of colors.
        step_l: Increase it to have more distinct colors (in term of lightness).
        alpha_s: Lower it to have a larger saturation range of colors.
        step_s: Increase it to have more distinct colors (in term of saturation).

    Returns:
        A dictionnary whose keys are population names and values are RGB colors.
    """
    assert isinstance(
        table.index, pd.MultiIndex
    ), f"The provided table has no multi-index. To work with hierarchical populations, consider reading https://mics-lab.github.io/scyan/tutorials/usage/#working-with-hierarchical-populations"

    pops = table.index.get_level_values(population_index).values
    level = table.index.get_level_values(level_index)
    level_counts = level.value_counts()

    group_palette = GroupPalette(alpha_l, step_l, alpha_s, step_s)
    color_groups = group_palette(level_counts.values, hue_shift)

    block_indices = [level_counts.index.get_loc(pop) for pop in level]
    s = pd.Series(level)
    inner_block_indices = s.groupby(s).cumcount().values

    return {
        pop: list(color_groups[block_index][inner_index])
        for pop, block_index, inner_index in zip(pops, block_indices, inner_block_indices)
    }

`scyan.tools.cell_type_ratios(adata, groupby=None, normalize=True, key='scyan_pop', among=None)`

Computes the ratio of cells per population. This ratio can be provided for each patient (or for any kind of 'group').

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`groupby`	`Union[str, List[str], None]`	Key(s) of `adata.obs` used to create groups (e.g. the patient ID).	`None`
`normalize`	`bool`	If `False`, returns counts instead of ratios. If `"%"`, use percentage instead of ratios in `[0, 1]`;	`True`
`key`	`str`	Key of `adata.obs` containing the population names (or the values to count).	`'scyan_pop'`
`among`	`str`	Key of `adata.obs` containing the parent population name. Typically, if using hierarchical populations, you can provide `'scyan_pop_level'` with your level name. E.g., if the parent of population of "T CD4 RM" is called "T cells" in `adata.obs[among]`, then this function computes the 'T CD4 RM ratio among T cells'.	`None`

Returns:

Type	Description
`DataFrame`	A DataFrame of ratios or counts (one row per group, one column per population). If `normalize=False`, then each row sums to 1 (for `among=None`).

Source code in scyan/tools/biomarkers.py

def cell_type_ratios(
    adata: AnnData,
    groupby: Union[str, List[str], None] = None,
    normalize: bool = True,
    key: str = "scyan_pop",
    among: str = None,
) -> pd.DataFrame:
    """Computes the ratio of cells per population. This ratio can be provided for each patient (or for any kind of 'group').

    Args:
        adata: An `AnnData` object.
        groupby: Key(s) of `adata.obs` used to create groups (e.g. the patient ID).
        normalize: If `False`, returns counts instead of ratios. If `"%"`, use percentage instead of ratios in `[0, 1]`;
        key: Key of `adata.obs` containing the population names (or the values to count).
        among: Key of `adata.obs` containing the parent population name. Typically, if using hierarchical populations, you can provide `'scyan_pop_level'` with your level name. E.g., if the parent of population of "T CD4 RM" is called "T cells" in `adata.obs[among]`, then this function computes the 'T CD4 RM ratio among T cells'.

    Returns:
        A DataFrame of ratios or counts (one row per group, one column per population). If `normalize=False`, then each row sums to 1 (for `among=None`).
    """
    assert (
        among is None or normalize
    ), "If 'among' is `None`, then normalize can't be `False`"

    column_suffix = (
        ("percentage" if normalize == "%" else "ratio") if normalize else "count"
    )

    counts = _get_counts(adata, groupby, key, normalize)

    if among is None:
        counts.columns = [f"{name} {column_suffix}" for name in counts.columns]
        return counts.mul(100) if normalize == "%" else counts

    parents_count = _get_counts(adata, groupby, among, normalize)

    df_parent = adata.obs.groupby(among)[key].apply(lambda s: s.value_counts()).unstack()
    assert (
        (df_parent > 0).sum(0) <= 1
    ).all(), f"Each population from adata.obs['{key}'] should have only one parent population in adata.obs['{among}']"
    to_parent_dict = dict(df_parent.idxmax())

    counts /= parents_count[[to_parent_dict[pop] for pop in counts.columns]].values
    counts.columns = [
        f"{pop} {column_suffix} among {to_parent_dict[pop]}" for pop in counts.columns
    ]
    return counts.mul(100) if normalize == "%" else counts

`scyan.tools.mean_intensities(adata, groupby=None, layer=None, key='scyan_pop', unstack_join=' mean intensity on ', obsm=None, obsm_names=None)`

Compute the Mean Metal Intensity (MMI) or Mean Fluorescence Intensity (MFI) per population. If needed, mean intensities can be computed per group (e.g., per patient) by providing the groupby argument.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`groupby`	`Union[str, List[str], None]`	Key(s) of `adata.obs` used to create groups. For instance, `"id"` computes MMI per population for each ID. You can also provide something like `["group", "id"]` to get MMI per group, and per patient inside each group.	`None`
`layer`	`Optional[str]`	In which `adata.layers` we get expression intensities. By default, it uses `adata.X`.	`None`
`key`	`str`	Key of `adata.obs` containing the population names.	`'scyan_pop'`
`unstack_join`	`Optional[str]`	If `None`, keep the information grouped. Else, flattens the biomarkers into one series (or one row per group if `groupby` is a list) and uses `unstack_join` to join the names of the multi-level columns. For instance, `' expression on '` can be a good choice.	`' mean intensity on '`
`obsm`	`Optional[str]`	In which `adata.obsm` we get expression intensities. By default, it uses `adata.X`. If not `None` then `obsm_names` is required too.	`None`
`obsm_names`	`Optional[List[str]]`	Ordered list of names in `adata.obsm[obsm]` if `obsm` was provided.	`None`

Returns:

Type	Description
`DataFrame`	A DataFrame of MFI. If `groupby` was a list, it is a multi-index dataframe.

Source code in scyan/tools/biomarkers.py

def mean_intensities(
    adata: AnnData,
    groupby: Union[str, List[str], None] = None,
    layer: Optional[str] = None,
    key: str = "scyan_pop",
    unstack_join: Optional[str] = " mean intensity on ",
    obsm: Optional[str] = None,
    obsm_names: Optional[List[str]] = None,
) -> pd.DataFrame:
    """Compute the Mean Metal Intensity (MMI) or Mean Fluorescence Intensity (MFI) per population. If needed, mean intensities can be computed per group (e.g., per patient) by providing the `groupby` argument.

    Args:
        adata: An `AnnData` object.
        groupby: Key(s) of `adata.obs` used to create groups. For instance, `"id"` computes MMI per population for each ID. You can also provide something like `["group", "id"]` to get MMI per group, and per patient inside each group.
        layer: In which `adata.layers` we get expression intensities. By default, it uses `adata.X`.
        key: Key of `adata.obs` containing the population names.
        unstack_join: If `None`, keep the information grouped. Else, flattens the biomarkers into one series (or one row per group if `groupby` is a list) and uses `unstack_join` to join the names of the multi-level columns. For instance, `' expression on '` can be a good choice.
        obsm: In which `adata.obsm` we get expression intensities. By default, it uses `adata.X`. If not `None` then `obsm_names` is required too.
        obsm_names: Ordered list of names in `adata.obsm[obsm]` if `obsm` was provided.

    Returns:
        A DataFrame of MFI. If `groupby` was a list, it is a multi-index dataframe.
    """
    if groupby is None:
        groupby = [key]
    elif isinstance(groupby, str):
        groupby = [groupby, key]
    else:
        groupby = list(groupby) + [key]

    if obsm is not None:
        assert (
            layer is None
        ), "You must choose between 'obsm' and 'layer', do not use both."

        df = pd.DataFrame(data=adata.obsm[obsm], columns=obsm_names)
    else:
        df = adata.to_df(layer)

    for group in groupby:
        df[group] = adata.obs[group].values

    res = df.groupby(groupby).mean().dropna(how="all")

    if res.values.min() < 0:
        log.warning(
            "The minimum expression value is negative. Are you sure you are using unscaled values? If not, you can use 'scyan.preprocess.unscale' and save the unscaled result in a 'adata.layers' of your choice (then use this layer argument in the current function). If you know what you are doing, or if you use flow cytometry data, you can ignore this warning."
        )

    if unstack_join is None:
        return res

    res = res.unstack(level=-1)
    if isinstance(res, pd.Series):
        res.index = [unstack_join.join(row).strip() for row in res.index.values]
    else:
        res.columns = [unstack_join.join(col).strip() for col in res.columns.values]
    return res

`scyan.tools.PolygonGatingUMAP`

Class used to select cells on a UMAP using polygons.

Note

If used on a Jupyter Notebook, you should first run %matplotlib tk. After the selection, you can run %matplotlib inline to retrieve the default behavior.

# Usage example (`%matplotlib tk` is required for the cell selection on jupyter notebooks)
>>> %matplotlib tk
>>> selector = scyan.tools.PolygonGatingUMAP(adata)
>>> selector.select()         # select the cells

>>> sub_adata = selector.extract_adata() # on a notebook, this has to be on a new jupyter cell

Source code in scyan/tools/gating.py

class PolygonGatingUMAP:
    """Class used to select cells on a UMAP using polygons.

    !!! note

        If used on a Jupyter Notebook, you should first run `%matplotlib tk`. After the selection, you can run `%matplotlib inline` to retrieve the default behavior.

    ```py
    # Usage example (`%matplotlib tk` is required for the cell selection on jupyter notebooks)
    >>> %matplotlib tk
    >>> selector = scyan.tools.PolygonGatingUMAP(adata)
    >>> selector.select()         # select the cells

    >>> sub_adata = selector.extract_adata() # on a notebook, this has to be on a new jupyter cell
    ```
    """

    def __init__(self, adata: AnnData) -> None:
        """
        Args:
            adata: An `AnnData` object.
        """
        self.adata = adata
        self.has_umap = _has_umap(adata)
        self.x_umap = self.adata.obsm["X_umap"]

    def select(self, s: float = 0.05) -> None:
        """Open a UMAP plot on which you can draw a polygon to select cells.

        Args:
            s: Size of the cells on the plot.
        """
        _, ax = plt.subplots()

        pts = ax.scatter(
            self.x_umap[self.has_umap, 0],
            self.x_umap[self.has_umap, 1],
            marker=".",
            rasterized=True,
            s=s,
        )

        self.selector = _SelectFromCollection(ax, pts, self.x_umap[self.has_umap])

        log.info(
            f"Enclose cells within a polygon. Helper:\n    - Click on the plot to add a polygon vertex\n    - Press the 'esc' key to start a new polygon\n    - Try holding the 'ctrl' key to move a single vertex\n    - Once the polygon is finished and overlaid in red, you can close the window"
        )
        plt.show()

    def save_selection(self, key_added: str = "scyan_selected"):
        """Save the selected cells in `adata.obs[key_added]`.

        Args:
            key_added: Column name used to save the selected cells in `adata.obs`.
        """
        self.adata.obs[key_added] = "unselected"
        col_index = self.adata.obs.columns.get_loc(key_added)
        self.adata.obs.iloc[
            np.where(self.has_umap)[0][self.selector.ind], col_index
        ] = "selected"
        self.adata.obs[key_added] = self.adata.obs[key_added].astype("category")

        self.selector.disconnect()
        log.info(
            f"Selected {len(self.selector.ind)} cells and saved the selection in adata.obs['{key_added}']"
        )

    def extract_adata(self) -> AnnData:
        """Returns an anndata objects whose cells where inside the polygon"""
        log.info(f"Selected {len(self.selector.ind)} cells")
        self.selector.disconnect()

        return self.adata[np.where(self.has_umap)[0][self.selector.ind]]

`init(adata)`

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required

Source code in scyan/tools/gating.py

def __init__(self, adata: AnnData) -> None:
    """
    Args:
        adata: An `AnnData` object.
    """
    self.adata = adata
    self.has_umap = _has_umap(adata)
    self.x_umap = self.adata.obsm["X_umap"]

`select(s=0.05)`

Open a UMAP plot on which you can draw a polygon to select cells.

Parameters:

Name	Type	Description	Default
`s`	`float`	Size of the cells on the plot.	`0.05`

Source code in scyan/tools/gating.py

def select(self, s: float = 0.05) -> None:
    """Open a UMAP plot on which you can draw a polygon to select cells.

    Args:
        s: Size of the cells on the plot.
    """
    _, ax = plt.subplots()

    pts = ax.scatter(
        self.x_umap[self.has_umap, 0],
        self.x_umap[self.has_umap, 1],
        marker=".",
        rasterized=True,
        s=s,
    )

    self.selector = _SelectFromCollection(ax, pts, self.x_umap[self.has_umap])

    log.info(
        f"Enclose cells within a polygon. Helper:\n    - Click on the plot to add a polygon vertex\n    - Press the 'esc' key to start a new polygon\n    - Try holding the 'ctrl' key to move a single vertex\n    - Once the polygon is finished and overlaid in red, you can close the window"
    )
    plt.show()

`save_selection(key_added='scyan_selected')`

Save the selected cells in adata.obs[key_added].

Parameters:

Name	Type	Description	Default
`key_added`	`str`	Column name used to save the selected cells in `adata.obs`.	`'scyan_selected'`

Source code in scyan/tools/gating.py

def save_selection(self, key_added: str = "scyan_selected"):
    """Save the selected cells in `adata.obs[key_added]`.

    Args:
        key_added: Column name used to save the selected cells in `adata.obs`.
    """
    self.adata.obs[key_added] = "unselected"
    col_index = self.adata.obs.columns.get_loc(key_added)
    self.adata.obs.iloc[
        np.where(self.has_umap)[0][self.selector.ind], col_index
    ] = "selected"
    self.adata.obs[key_added] = self.adata.obs[key_added].astype("category")

    self.selector.disconnect()
    log.info(
        f"Selected {len(self.selector.ind)} cells and saved the selection in adata.obs['{key_added}']"
    )

`extract_adata()`

Returns an anndata objects whose cells where inside the polygon

Source code in scyan/tools/gating.py

def extract_adata(self) -> AnnData:
    """Returns an anndata objects whose cells where inside the polygon"""
    log.info(f"Selected {len(self.selector.ind)} cells")
    self.selector.disconnect()

    return self.adata[np.where(self.has_umap)[0][self.selector.ind]]

`scyan.tools.PolygonGatingScatter`

Class used to select cells on a scatterplot using polygons.

Note

If used on a Jupyter Notebook, you should first run %matplotlib tk. After the selection, you can run %matplotlib inline to retrieve the default behavior.

# Usage example (`%matplotlib tk` is required for the cell selection on jupyter notebooks)
>>> %matplotlib tk
>>> selector = scyan.tools.PolygonGatingScatter(adata)
>>> selector.select()         # select the cells

>>> sub_adata = selector.extract_adata() # on a notebook, this has to be on a new jupyter cell

Source code in scyan/tools/gating.py

class PolygonGatingScatter:
    """Class used to select cells on a scatterplot using polygons.

    !!! note

        If used on a Jupyter Notebook, you should first run `%matplotlib tk`. After the selection, you can run `%matplotlib inline` to retrieve the default behavior.

    ```py
    # Usage example (`%matplotlib tk` is required for the cell selection on jupyter notebooks)
    >>> %matplotlib tk
    >>> selector = scyan.tools.PolygonGatingScatter(adata)
    >>> selector.select()         # select the cells

    >>> sub_adata = selector.extract_adata() # on a notebook, this has to be on a new jupyter cell
    ```
    """

    def __init__(self, adata: AnnData) -> None:
        """
        Args:
            adata: An `AnnData` object.
        """
        self.adata = adata

    def select(
        self, x: str, y: str, s: float = 0.05, max_cells_display: int = 100_000
    ) -> None:
        """Open a scatter plot on which you can draw a polygon to select cells.

        Args:
            x: Column name of adata.obs used for the x-axis
            y: Column name of adata.obs used for the y-axis
            s: Size of the cells on the plot.
        """
        _, ax = plt.subplots()

        indices = np.arange(self.adata.n_obs)
        if max_cells_display is not None and max_cells_display < self.adata.n_obs:
            indices = np.random.choice(
                np.arange(self.adata.n_obs), size=max_cells_display, replace=False
            )

        x = self.adata.obs_vector(x)
        y = self.adata.obs_vector(y)
        xy = np.stack([x, y], axis=1)

        pts = ax.scatter(
            xy[indices, 0],
            xy[indices, 1],
            marker=".",
            rasterized=True,
            s=s,
        )

        self.selector = _SelectFromCollection(ax, pts, xy)

        log.info(
            f"Enclose cells within a polygon. Helper:\n    - Click on the plot to add a polygon vertex\n    - Press the 'esc' key to start a new polygon\n    - Try holding the 'ctrl' key to move a single vertex\n    - Once the polygon is finished and overlaid in red, you can close the window"
        )
        plt.show()

    def save_selection(self, key_added: str = "scyan_selected"):
        """Save the selected cells in `adata.obs[key_added]`.

        Args:
            key_added: Column name used to save the selected cells in `adata.obs`.
        """
        self.adata.obs[key_added] = "unselected"
        col_index = self.adata.obs.columns.get_loc(key_added)
        self.adata.obs.iloc[self.selector.ind, col_index] = "selected"
        self.adata.obs[key_added] = self.adata.obs[key_added].astype("category")

        self.selector.disconnect()
        log.info(
            f"Selected {len(self.selector.ind)} cells and saved the selection in adata.obs['{key_added}']"
        )

    def extract_adata(self) -> AnnData:
        """Returns an anndata objects whose cells where inside the polygon"""
        log.info(f"Selected {len(self.selector.ind)} cells")
        self.selector.disconnect()

        return self.adata[self.selector.ind]

`init(adata)`

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required

Source code in scyan/tools/gating.py

def __init__(self, adata: AnnData) -> None:
    """
    Args:
        adata: An `AnnData` object.
    """
    self.adata = adata

`select(x, y, s=0.05, max_cells_display=100000)`

Open a scatter plot on which you can draw a polygon to select cells.

Parameters:

Name	Type	Description	Default
`x`	`str`	Column name of adata.obs used for the x-axis	required
`y`	`str`	Column name of adata.obs used for the y-axis	required
`s`	`float`	Size of the cells on the plot.	`0.05`

Source code in scyan/tools/gating.py

def select(
    self, x: str, y: str, s: float = 0.05, max_cells_display: int = 100_000
) -> None:
    """Open a scatter plot on which you can draw a polygon to select cells.

    Args:
        x: Column name of adata.obs used for the x-axis
        y: Column name of adata.obs used for the y-axis
        s: Size of the cells on the plot.
    """
    _, ax = plt.subplots()

    indices = np.arange(self.adata.n_obs)
    if max_cells_display is not None and max_cells_display < self.adata.n_obs:
        indices = np.random.choice(
            np.arange(self.adata.n_obs), size=max_cells_display, replace=False
        )

    x = self.adata.obs_vector(x)
    y = self.adata.obs_vector(y)
    xy = np.stack([x, y], axis=1)

    pts = ax.scatter(
        xy[indices, 0],
        xy[indices, 1],
        marker=".",
        rasterized=True,
        s=s,
    )

    self.selector = _SelectFromCollection(ax, pts, xy)

    log.info(
        f"Enclose cells within a polygon. Helper:\n    - Click on the plot to add a polygon vertex\n    - Press the 'esc' key to start a new polygon\n    - Try holding the 'ctrl' key to move a single vertex\n    - Once the polygon is finished and overlaid in red, you can close the window"
    )
    plt.show()

`save_selection(key_added='scyan_selected')`

Save the selected cells in adata.obs[key_added].

Parameters:

Name	Type	Description	Default
`key_added`	`str`	Column name used to save the selected cells in `adata.obs`.	`'scyan_selected'`

Source code in scyan/tools/gating.py

def save_selection(self, key_added: str = "scyan_selected"):
    """Save the selected cells in `adata.obs[key_added]`.

    Args:
        key_added: Column name used to save the selected cells in `adata.obs`.
    """
    self.adata.obs[key_added] = "unselected"
    col_index = self.adata.obs.columns.get_loc(key_added)
    self.adata.obs.iloc[self.selector.ind, col_index] = "selected"
    self.adata.obs[key_added] = self.adata.obs[key_added].astype("category")

    self.selector.disconnect()
    log.info(
        f"Selected {len(self.selector.ind)} cells and saved the selection in adata.obs['{key_added}']"
    )

`extract_adata()`

Returns an anndata objects whose cells where inside the polygon

Source code in scyan/tools/gating.py

def extract_adata(self) -> AnnData:
    """Returns an anndata objects whose cells where inside the polygon"""
    log.info(f"Selected {len(self.selector.ind)} cells")
    self.selector.disconnect()

    return self.adata[self.selector.ind]

Tools

scyan.tools.umap(adata, markers=None, obsm=None, n_cells=200000, min_dist=0.5, obsm_key='X_umap', filter=None, **umap_kwargs)

scyan.tools.leiden(adata, resolution=1, key_added='leiden', n_neighbors=15)

scyan.tools.subcluster(adata, population, markers=None, key='scyan_pop', resolution=0.2, size_ratio_th=0.02, min_cells_th=200, n_cells=100000)

scyan.tools.palette_level(table, population_index=0, level_index=1, hue_shift=0.4, alpha_l=0.25, step_l=0.15, alpha_s=0.3, step_s=0.4)

scyan.tools.cell_type_ratios(adata, groupby=None, normalize=True, key='scyan_pop', among=None)

scyan.tools.mean_intensities(adata, groupby=None, layer=None, key='scyan_pop', unstack_join=' mean intensity on ', obsm=None, obsm_names=None)

scyan.tools.PolygonGatingUMAP

__init__(adata)

select(s=0.05)

save_selection(key_added='scyan_selected')

extract_adata()

scyan.tools.PolygonGatingScatter

__init__(adata)

select(x, y, s=0.05, max_cells_display=100000)

save_selection(key_added='scyan_selected')

extract_adata()

`scyan.tools.umap(adata, markers=None, obsm=None, n_cells=200000, min_dist=0.5, obsm_key='X_umap', filter=None, **umap_kwargs)`

`scyan.tools.leiden(adata, resolution=1, key_added='leiden', n_neighbors=15)`

`scyan.tools.subcluster(adata, population, markers=None, key='scyan_pop', resolution=0.2, size_ratio_th=0.02, min_cells_th=200, n_cells=100000)`

`scyan.tools.palette_level(table, population_index=0, level_index=1, hue_shift=0.4, alpha_l=0.25, step_l=0.15, alpha_s=0.3, step_s=0.4)`

`scyan.tools.cell_type_ratios(adata, groupby=None, normalize=True, key='scyan_pop', among=None)`

`scyan.tools.mean_intensities(adata, groupby=None, layer=None, key='scyan_pop', unstack_join=' mean intensity on ', obsm=None, obsm_names=None)`

`scyan.tools.PolygonGatingUMAP`

`init(adata)`

`select(s=0.05)`

`save_selection(key_added='scyan_selected')`

`extract_adata()`

`scyan.tools.PolygonGatingScatter`

`init(adata)`

`select(x, y, s=0.05, max_cells_display=100000)`

`save_selection(key_added='scyan_selected')`

`extract_adata()`