Preprocessing

`scyan.preprocess.auto_logicle_transform(adata, q=0.05, m=4.5, quantile_clip=1e-05)`

Auto-logicle transformation implementation. We recommend it for flow cytometry or spectral flow cytometry data.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`q`	`float`	See logicle article. Defaults to 0.05.	`0.05`
`m`	`float`	See logicle article. Defaults to 4.5.	`4.5`

Source code in scyan/preprocess.py

def auto_logicle_transform(
    adata: AnnData, q: float = 0.05, m: float = 4.5, quantile_clip: Optional[float] = 1e-5
) -> None:
    """[Auto-logicle transformation](https://pubmed.ncbi.nlm.nih.gov/16604519/) implementation.
    We recommend it for flow cytometry or spectral flow cytometry data.

    Args:
        adata: An `AnnData` object.
        q: See logicle article. Defaults to 0.05.
        m: See logicle article. Defaults to 4.5.
    """
    adata.uns["scyan_logicle"] = {}
    markers_failed = []

    for marker in adata.var_names:
        column = adata[:, marker].X.toarray().flatten()

        w = 0
        t = column.max()
        negative_values = column[column < 0]

        if negative_values.size:
            threshold = np.quantile(negative_values, 0.25) - 1.5 * scipy.stats.iqr(
                negative_values
            )
            negative_values = negative_values[negative_values >= threshold]

            if negative_values.size:
                r = 1e-8 + np.quantile(negative_values, q)
                if 10**m * abs(r) > t:
                    w = (m - np.log10(t / abs(r))) / 2

        if not w or w > 2:
            markers_failed.append(marker)
            w, t = 1, 5e5

        column = flowutils.transforms.logicle(column, None, t=t, m=m, w=w)
        adata.uns["scyan_logicle"][marker] = [t, m, w]

        if quantile_clip is None:
            adata[:, marker] = column
        else:
            adata[:, marker] = column.clip(np.quantile(column, quantile_clip))

    if markers_failed:
        log.warning(
            f"Auto logicle transformation failed for the following markers (logicle was used instead): {', '.join(markers_failed)}.\nIt can happen when expressions are all positive or all negative."
        )

`scyan.preprocess.asinh_transform(adata, translation=0, cofactor=5)`

Asinh transformation for cell-expressions: \(asinh((x - translation)/cofactor)\).

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`translation`	`float`	Constant substracted to the marker expression before division by the cofactor.	`0`
`cofactor`	`float`	Scaling factor before computing the asinh.	`5`

Source code in scyan/preprocess.py

def asinh_transform(adata: AnnData, translation: float = 0, cofactor: float = 5) -> None:
    """Asinh transformation for cell-expressions: $asinh((x - translation)/cofactor)$.

    Args:
        adata: An `AnnData` object.
        translation: Constant substracted to the marker expression before division by the cofactor.
        cofactor: Scaling factor before computing the asinh.
    """
    adata.uns["scyan_asinh"] = [translation, cofactor]
    adata.X = np.arcsinh((adata.X - translation) / cofactor)

`scyan.preprocess.inverse_transform(adata, obsm=None, obsm_names=None, transformation=None)`

Inverses the transformation function, i.e. either scyan.preprocess.auto_logicle_transform or scyan.preprocess.asinh_transform. It requires to have run have of these before.

Note

If you scaled your data, the complete inverse consists in running scyan.preprocess.unscale first, and then this function.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`obsm`	`Optional[str]`	Name of the anndata obsm to consider. If `None`, use `adata.X`.	`None`
`obsm_names`	`Optional[List[str]]`	Names of the ordered markers from obsm. It is required if obsm is not `None`, if there are less markers than in `adata.X`, and if the transformation to reverse is `logicle`. Usually, it corresponds to `model.var_names`.	`None`
`transformation`	`Optional[str]`	Name of the transformation to inverse: one of `['logicle', 'asinh', None]`. By default, it chooses automatically depending on which transformation was previously run.	`None`

Returns:

Type	Description
`ndarray`	Inverse transformed expressions array of shape \((N, M)\).

Source code in scyan/preprocess.py

def inverse_transform(
    adata: AnnData,
    obsm: Optional[str] = None,
    obsm_names: Optional[List[str]] = None,
    transformation: Optional[str] = None,
) -> np.ndarray:
    """Inverses the transformation function, i.e. either [scyan.preprocess.auto_logicle_transform][] or [scyan.preprocess.asinh_transform][]. It requires to have run have of these before.

    !!! note
        If you scaled your data, the complete inverse consists in running [scyan.preprocess.unscale][] first, and then this function.

    Args:
        adata: An `AnnData` object.
        obsm: Name of the anndata obsm to consider. If `None`, use `adata.X`.
        obsm_names: Names of the ordered markers from obsm. It is required if obsm is not `None`, if there are less markers than in `adata.X`, and if the transformation to reverse is `logicle`. Usually, it corresponds to `model.var_names`.
        transformation: Name of the transformation to inverse: one of `['logicle', 'asinh', None]`. By default, it chooses automatically depending on which transformation was previously run.

    Returns:
        Inverse transformed expressions array of shape $(N, M)$.
    """
    if transformation is None:
        transformation = "asinh" if "scyan_asinh" in adata.uns else None
        transformation = "logicle" if "scyan_logicle" in adata.uns else transformation
        if transformation is None:
            raise ValueError(
                "No transformation to inverse: you need to run 'asinh_transform' or 'auto_logicle_transform' before to inverse it."
            )

    if transformation == "logicle":
        log.info("Performing inverse logicle transform")
        assert (
            "scyan_logicle" in adata.uns
        ), "You need to run 'auto_logicle_transform' before to inverse it."

        if obsm is None:
            obsm_names = adata.var_names
        elif obsm_names is None:
            assert (
                adata.obsm[obsm].shape[1] != adata.n_vars
            ), f"When the number of var in adata.obsm['{obsm}'] is not `adata.n_vars`, use `obs_names`"
            obsm_names = adata.var_names

        return np.stack(
            [_logicle_inverse_one(adata, obsm, marker) for marker in obsm_names],
            axis=1,
        )

    if transformation == "asinh":
        log.info("Performing inverse asinh transform")
        assert (
            "scyan_asinh" in adata.uns
        ), "You need to run 'asinh_transform' before to inverse it."

        X = adata.X if obsm is None else adata.obsm[obsm]
        translation, cofactor = adata.uns["scyan_asinh"]

        return np.sinh(X) * cofactor + translation

    raise NameError(
        f"Parameter 'transformation' has to be 'logicle' or 'asinh'. Found {transformation}."
    )

`scyan.preprocess.scale(adata, max_value=10, center=None)`

Tranforms the data such as (i) std=1, and (ii) either 0 is sent to -1 (for CyTOF data) or means=0 (for flow or spectral flow data); except if center is set (which overwrites the default behavior).

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`max_value`	`float`	Clip to this value after scaling.	`10`
`center`	`Optional[bool]`	If `None`, data is only centered for spectral or flow cytometry data (recommended), else, it is centered or not according to the value given.	`None`

Source code in scyan/preprocess.py

def scale(adata: AnnData, max_value: float = 10, center: Optional[bool] = None) -> None:
    """Tranforms the data such as (i) `std=1`, and (ii) either `0` is sent to `-1` (for CyTOF data) or `means=0` (for flow or spectral flow data); except if `center` is set (which overwrites the default behavior).

    Args:
        adata: An `AnnData` object.
        max_value: Clip to this value after scaling.
        center: If `None`, data is only centered for spectral or flow cytometry data (recommended), else, it is centered or not according to the value given.
    """
    stds = adata.X.std(axis=0)
    adata.uns["scyan_scaling_stds"] = stds

    if (center is False) or (center is None and "scyan_asinh" in adata.uns):
        log.info(
            "Data will be standardised, and translated so that 0 goes to -1. This is advised only when using CyTOF data (if this is not your case, consider running 'auto_logicle_transform' instead of 'asinh_transform')."
        )
        adata.X = (adata.X / stds - 1).clip(-max_value, max_value)
    else:
        log.info(
            "Data will be centered and standardised. This is advised only when using spectral/flow data (if this is not your case, consider running 'asinh_transform' instead of 'auto_logicle_transform')."
        )
        means = adata.X.mean(axis=0)
        adata.X = ((adata.X - means) / stds).clip(-max_value, max_value)
        adata.uns["scyan_scaling_means"] = means

`scyan.preprocess.unscale(adata, obsm=None, obsm_names=None)`

Reverse standardisation. It requires to have run scyan.preprocess.scale before.

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object.	required
`obsm`	`Optional[str]`	Name of the adata obsm to consider. If `None`, use `adata.X`.	`None`
`obsm_names`	`Optional[List[str]]`	Names of the ordered markers from obsm. It is required if obsm is not `None`, and if there are less markers than in `adata.X`. Usually, it corresponds to `model.var_names`.	`None`

Returns:

Type	Description
`ndarray`	Unscaled numpy array of shape \((N, M)\).

Source code in scyan/preprocess.py

def unscale(
    adata: AnnData, obsm: Optional[str] = None, obsm_names: Optional[List[str]] = None
) -> np.ndarray:
    """Reverse standardisation. It requires to have run [scyan.preprocess.scale][] before.

    Args:
        adata: An `AnnData` object.
        obsm: Name of the adata obsm to consider. If `None`, use `adata.X`.
        obsm_names: Names of the ordered markers from obsm. It is required if obsm is not `None`, and if there are less markers than in `adata.X`. Usually, it corresponds to `model.var_names`.

    Returns:
        Unscaled numpy array of shape $(N, M)$.
    """
    assert (
        "scyan_scaling_stds" in adata.uns
    ), "It seems you haven't run 'scyan.preprocess.scale' before."

    X = adata.X if obsm is None else adata.obsm[obsm]
    stds = adata.uns["scyan_scaling_stds"]

    if obsm is not None and X.shape[1] != adata.n_vars:
        assert (
            obsm_names is not None
        ), f"Found {X.shape[1]} markers in adata.obsm['{obsm}'], but 'adata' has {adata.n_vars} vars. Please use the 'obsm_names' argument to provide the ordered names of the markers used in adata.obsm['{obsm}']."

        indices = [adata.var_names.get_loc(marker) for marker in obsm_names]
        stds = stds[indices]

    if "scyan_scaling_means" in adata.uns:
        return adata.uns["scyan_scaling_means"] + stds * X

    return (X + 1) * stds

`scyan.preprocess.compensate(adata, key_added=None)`

Use the spillover matrix in adata.varp["spillover_matrix"] to correct spillover from adata.X

Parameters:

Name	Type	Description	Default
`adata`	`AnnData`	An `AnnData` object	required
`key_added`	`Optional[str]`	Optional key in `adata.layers` information is saved to. By default, saved in `adata.X`	`None`

Source code in scyan/preprocess.py

def compensate(adata: AnnData, key_added: Optional[str] = None):
    """Use the spillover matrix in `adata.varp["spillover_matrix"]` to correct spillover from `adata.X`

    Args:
        adata: An `AnnData` object
        key_added: Optional key in `adata.layers` information is saved to. By default, saved in `adata.X`
    """
    assert "spillover_matrix" in adata.varp, f"No 'spillover_matrix' found in adata.varp"

    if any(
        name in adata.uns
        for name in ["scyan_asinh", "scyan_logicle", "scyan_scaling_means"]
    ):
        log.warn("It is recommended to apply spillover only on raw data (unprocessed)")

    S = adata.varp["spillover_matrix"]
    corrected = np.linalg.solve(S, adata.X.T).T

    if key_added is None:
        adata.X = corrected
    else:
        adata.layers[key_added] = corrected