Source code for resdk.tables.methylation

""".. Ignore pydocstyle D400.

=================
MethylationTables
=================

.. autoclass:: MethylationTables
    :members:
    :inherited-members:

    .. automethod:: __init__

"""

from functools import lru_cache
from typing import Callable, Optional

import pandas as pd

from resdk.resources import Collection

from .base import BaseTables

CHUNK_SIZE = 1000


[docs]class MethylationTables(BaseTables):
    """A helper class to fetch collection's methylation and meta data.

    This class enables fetching given collection's data and returning it
    as tables which have samples in rows and methylation/metadata in
    columns.

    A simple example:

    .. code-block:: python

        # Get Collection object
        collection = res.collection.get("collection-slug")

        # Fetch collection methylation and metadata
        tables = MethylationTables(collection)
        meta = tables.meta
        beta = tables.beta
        m_values = tables.mval

    """

    process_type = "data:methylation:"
    BETA = "betas"
    MVAL = "mvals"

    data_type_to_field_name = {
        BETA: "methylation_data",
        MVAL: "methylation_data",
    }

[docs]    def __init__(
        self,
        collection: Collection,
        cache_dir: Optional[str] = None,
        progress_callable: Optional[Callable] = None,
    ):
        """Initialize class.

        :param collection: collection to use
        :param cache_dir: cache directory location, if not specified system specific
                          cache directory is used
        :param progress_callable: custom callable that can be used to report
                                  progress. By default, progress is written to
                                  stderr with tqdm
        """
        super().__init__(collection, cache_dir, progress_callable)

        self.probe_ids = []  # type: List[str]

    @property
    @lru_cache()
    def beta(self) -> pd.DataFrame:
        """Return beta values table as a pandas DataFrame object."""
        beta = self._load_fetch(self.BETA)
        self.probe_ids = beta.columns.tolist()
        return beta

    @property
    @lru_cache()
    def mval(self) -> pd.DataFrame:
        """Return m-values as a pandas DataFrame object."""
        mval = self._load_fetch(self.MVAL)
        self.probe_ids = mval.columns.tolist()
        return mval

    def _download_qc(self) -> pd.DataFrame:
        """Download sample QC data and transform into table."""
        return pd.DataFrame()

    def _parse_file(self, file_obj, sample_id, data_type):
        """Parse file object and return one DataFrame line."""
        sample_data = pd.read_csv(
            file_obj,
            sep="\t",
            compression="gzip",
            usecols=["probe_ids", data_type],
            index_col="probe_ids",
        )[data_type]
        sample_data.name = sample_id
        return sample_data