Source code for pyActigraphy.metrics.metrics

import pandas as pd
import numpy as np
import re
# from functools import lru_cache
from ..utils.utils import _average_daily_activity
from ..utils.utils import _activity_onset_time
from ..utils.utils import _activity_offset_time
from ..utils.utils import _shift_time_axis
# from ..sleep.scoring import AonT, AoffT
from statistics import mean
import statsmodels.api as sm

__all__ = [
    'MetricsMixin',
    'ForwardMetricsMixin',
    # '_average_daily_activity',
    '_average_daily_total_activity',
    '_interdaily_stability',
    '_intradaily_variability',
    '_lmx', '_interval_maker',
    '_count_consecutive_values',
    '_count_consecutive_zeros',
    '_transition_prob',
    '_transition_prob_sustain_region',
    '_td_format']


def _average_daily_total_activity(data, rescale, exclude_ends):
    r"""Calculate the average daily activity"""

    # Shortcut: if rescale is false, compute the daily average
    if rescale is False:
        daily_sum = data.resample('1D').sum()
    else:
        # Aggregate data daily:
        # - compute the daily sum
        # - count the number of epochs included in each day
        daily_agg = data.resample('1D').agg(['count', 'sum'])

        # Compute weights as a function of the number of epochs per day:
        # weight =  (#epochs/day) / (#count/day)
        # NB: needed to account for potentially masked periods.
        daily_agg['weigth'] = (pd.Timedelta('24h')/data.index.freq)
        daily_agg['weigth'] /= daily_agg['count']

        # Rescale activity
        daily_sum = daily_agg['sum']*daily_agg['weigth']

    # Exclude first and last days
    if exclude_ends:
        daily_sum = daily_sum.iloc[1:-1]

    return daily_sum.mean()


def _interdaily_stability(data):
    r"""Calculate the interdaily stability"""

    d_24h = data.groupby([
        data.index.hour,
        data.index.minute,
        data.index.second]
    ).mean().var()

    d_1h = data.var()

    return (d_24h / d_1h)


def _intradaily_variability(data):
    r"""Calculate the intradaily variability"""

    c_1h = data.diff(1).pow(2).mean()

    d_1h = data.var()

    return (c_1h / d_1h)


def _lmx(data, period, lowest=True):
    """Calculate the start time and mean activity of the period of
    lowest/highest activity"""

    avgdaily = _average_daily_activity(data=data, cyclic=True)

    n_epochs = int(pd.Timedelta(period)/avgdaily.index.freq)

    mean_activity = avgdaily.rolling(period).sum().shift(-n_epochs+1)

    if lowest:
        t_start = mean_activity.idxmin()
    else:
        t_start = mean_activity.idxmax()

    lmx = mean_activity[t_start]/n_epochs
    return t_start, lmx


def _interval_maker(index, period, verbose):
    """ """
    # TODO: test if period is a valid string

    (num_periods, td) = divmod(
        (index[-1] - index[0]), pd.Timedelta(period)
    )
    if verbose:
        print("Number of periods: {0}\n Time unaccounted for: {1}".format(
            num_periods,
            '{} days, {}h, {}m, {}s'.format(
                td.days,
                td.seconds//3600,
                (td.seconds//60) % 60,
                td.seconds % 60
            )
        ))

    intervals = [(
        index[0] + (i)*pd.Timedelta(period),
        index[0] + (i+1)*pd.Timedelta(period))
        for i in range(0, num_periods)
    ]

    return intervals


def _count_consecutive_values(data):
    """ Create a count list for identical consecutive numbers
    together with a state for each series:
     - 1 if the sum of the consecutive series numbers is positive
     - 0 otherwise
    """

    consecutive_values = data.groupby(
        # create identical 'labels' for identical consecutive numbers
        [data.diff().ne(0).cumsum()]
    ).agg(['count', lambda x: (np.sum(x) > 0).astype(int)])
    # rename columns
    consecutive_values.columns = ['counts', 'state']

    return consecutive_values


def _count_consecutive_zeros(data):
    ccz = _count_consecutive_values(data)
    ccz['end'] = ccz['counts'].cumsum()
    ccz['start'] = ccz['end'].shift(1).fillna(0).astype(int)
    return ccz[ccz['state'] < 1]


def _transition_prob(data, from_zero_to_one):

    # Create a list of consecutive sequence of active/rest epochs
    ccv = _count_consecutive_values(data)
    # filter out sequences of active epochs
    if from_zero_to_one is True:
        bouts = ccv[ccv['state'] < 1]['counts']
    else:
        bouts = ccv[ccv['state'] > 0]['counts']
    # Count the number of sequences of length N for N=1...Nmax
    Nt = bouts.groupby(bouts).count()
    # Create its reverse cumulative sum so that Nt at index t is equal to
    # the number of sequences of lengths t or longer.
    Nt = np.cumsum(Nt[::-1])[::-1]
    # Rest->Activity (or Activity->Rest) transition probability at time t,
    # defined as the number of sequences for which R->A at time t+1 / Nt
    prob = Nt.diff(-1)/Nt
    # Correct pRA for discontinuities due to sparse data
    prob = prob.dropna() / np.diff(prob.index.tolist())
    # Define the weights as the square root of the number of runs
    # contributing to each probability estimate
    prob_weights = np.sqrt(Nt+Nt.shift(-1)).dropna()

    return prob, prob_weights


def _transition_prob_sustain_region(prob, prob_weights, frac=.3, it=0):

    # Fit the 'prob' distribution with a LOWESS
    lowess = sm.nonparametric.lowess(
        prob.values, prob.index, return_sorted=False, frac=frac, it=it
    )

    # Calculate the pRA std
    std = prob.std()

    # Check which residuals are below 1 sigma
    prob_residuals_below_one_std = _count_consecutive_values(
        ((prob-lowess).abs() < std).astype(int)
    )

    # Find the index of the longest series of consecutive values below 1 SD
    index = prob_residuals_below_one_std[
        prob_residuals_below_one_std['state'] > 0
    ]['counts'].idxmax()-1

    # Calculate the cumulative sum of the indices of series of consecutive
    # values of residuals below 1 SD in order to find the number of points
    # before the "index".
    prob_cumsum = prob_residuals_below_one_std['counts'].cumsum()

    # Calculate the start and end indices
    if index < prob_cumsum.index.min():
        start_index = 0
    else:
        start_index = prob_cumsum[index]
    # start_index = prob_cumsum[index]+1
    end_index = prob_cumsum[index+1]

    kProb = np.average(
        prob[start_index:end_index],
        weights=prob_weights[start_index:end_index]
    )
    return kProb


def _td_format(td):
    return '{:02}:{:02}:{:02}'.format(
        td.components.hours,
        td.components.minutes,
        td.components.seconds
    )


[docs]class MetricsMixin(object):
    """ Mixin Class """

[docs]    def average_daily_activity(
        self,
        freq='5min',
        cyclic=False,
        binarize=True,
        threshold=4,
        time_origin=None,
        whs='1h'
    ):
        r"""Average daily activity distribution

        Calculate the daily profile of activity. Data are averaged over all the
        days.

        Parameters
        ----------
        freq: str, optional
            Data resampling frequency.
            Cf. #timeseries-offset-aliases in
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
        cyclic: bool, optional
            If set to True, two daily profiles are concatenated to ensure
            continuity between the last point of the day and the first one.
            Default is False.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
        time_origin: str or pd.Timedelta, optional
            If not None, origin of the time axis for the daily profile.
            Original time bins are translated as time delta with respect to
            this new origin.
            Default is None
            Supported time string: 'AonT', 'AoffT', any 'HH:MM:SS'
        whs: str, optional
            Window half size parameter for the detection of the activity
            onset/offset time. Relevant only if time_origin is set to
            'AonT' or AoffT'.
            Default is '1h'.

        Returns
        -------
        raw : pandas.Series
            A Series containing the daily activity profile with a 24h index.
        """
        data = self.resampled_data(freq, binarize, threshold)

        if time_origin is None:

            return _average_daily_activity(data, cyclic=cyclic)

        else:
            if cyclic is True:
                raise NotImplementedError(
                    'Setting a time origin while cyclic option is True is not '
                    'implemented yet.'
                )

            avgdaily = _average_daily_activity(data, cyclic=False)

            if isinstance(time_origin, str):
                # Regex pattern for HH:MM:SS time string
                pattern = re.compile(
                    r"^([0-1]\d|2[0-3])(?::([0-5]\d))(?::([0-5]\d))$"
                )

                if time_origin == 'AonT':
                    # Convert width half size from Timedelta to a nr of points
                    whs = int(pd.Timedelta(whs)/data.index.freq)
                    time_origin = _activity_onset_time(avgdaily, whs=whs)
                elif time_origin == 'AoffT':
                    # Convert width half size from Timedelta to a nr of points
                    whs = int(pd.Timedelta(whs)/data.index.freq)
                    time_origin = _activity_offset_time(avgdaily, whs=whs)
                elif pattern.match(time_origin):
                    time_origin = pd.Timedelta(time_origin)
                else:
                    raise ValueError(
                        'Time origin format ({}) not supported.\n'.format(
                            time_origin
                        )
                        + 'Supported format: {}.'.format('HH:MM:SS')
                    )

            elif not isinstance(time_origin, pd.Timedelta):
                raise ValueError(
                    'Time origin is neither a time string with a supported'
                    'format, nor a pd.Timedelta.'
                )

            # Round time origin to the required frequency
            time_origin = time_origin.round(data.index.freq)

            shift = int((pd.Timedelta('12h')-time_origin)/data.index.freq)

            return _shift_time_axis(avgdaily, shift)

[docs]    def average_daily_light(self, freq='5min', cyclic=False):
        r"""Average daily light distribution

        Calculate the daily profile of light exposure (in lux). Data are
        averaged over all the days.

        Parameters
        ----------
        freq: str, optional
            Data resampling frequency.
            Cf. #timeseries-offset-aliases in
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>.
        cyclic: bool, optional
            If set to True, two daily profiles are concatenated to ensure
            continuity between the last point of the day and the first one.
            Default is False.

        Returns
        -------
        raw : pandas.Series
            A Series containing the daily profile of light exposure with a 24h
            index.
        """

        light = self.resampled_light(freq)

        avgdaily_light = _average_daily_activity(light, cyclic=cyclic)

        return avgdaily_light

[docs]    def ADAT(
        self, binarize=True, threshold=4, rescale=True, exclude_ends=False
    ):
        """Total average daily activity

        Calculate the total activity counts, averaged over all the days.

        Parameters
        ----------
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
        rescale: bool, optional
            If set to True, the activity counts are rescaled to account for
            masked periods (if any).
            Default is True.
        exclude_ends: bool, optional
            If set to True, the first and last daily periods are excluded from
            the calculation. Useful when the recording does start or end at
            midnigth.
            Default is False.

        Returns
        -------
        adat : int
        """

        if binarize is True:
            data = self.binarized_data(threshold)
        else:
            data = self.data

        adat = _average_daily_total_activity(
            data, rescale=rescale, exclude_ends=exclude_ends)

        return adat

[docs]    def ADATp(
        self,
        period='7D',
        binarize=True,
        threshold=4,
        rescale=True,
        exclude_ends=False,
        verbose=False
    ):
        """Total average daily activity per period

        Calculate the total activity counts, averaged over each consecutive
        period contained in the data. The number of periods

        Parameters
        ----------
        period: str, optional
            Time length of the period to be considered. Must be understandable
            by pandas.Timedelta
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
        rescale: bool, optional
            If set to True, the activity counts are rescaled to account for
            masked periods (if any).
            Default is True.
        exclude_ends: bool, optional
            If set to True, the first and last daily periods are excluded from
            the calculation. Useful when the recording does start or end at
            midnigth.
            Default is False.
        verbose: bool, optional
            If set to True, display the number of periods found in the data.
            Also display the time not accounted for.
            Default is False.

        Returns
        -------
        adatp : list of int
        """

        if binarize is True:
            data = self.binarized_data(threshold)
        else:
            data = self.data

        intervals = _interval_maker(data.index, period, verbose)

        results = [
            _average_daily_total_activity(
                data[time[0]:time[1]],
                rescale=rescale,
                exclude_ends=exclude_ends
            ) for time in intervals
        ]

        return results

[docs]    def L5(self, binarize=True, threshold=4):
        r"""L5

        Mean activity during the 5 least active hours of the day.

        Parameters
        ----------
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.

        Returns
        -------
        l5: float


        Notes
        -----

        The L5 [1]_ variable is calculated as the mean, per acquisition period,
        of the average daily activities during the 5 least active hours.

        .. warning:: The value of this variable depends on the length of the
                     acquisition period.

        References
        ----------

        .. [1] Van Someren, E.J.W., Lijzenga, C., Mirmiran, M., Swaab, D.F.
               (1997). Long-Term Fitness Training Improves the Circadian
               Rest-Activity Rhythm in Healthy Elderly Males.
               Journal of Biological Rhythms, 12(2), 146–156.
               http://doi.org/10.1177/074873049701200206

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.L5()
            0.XXXX
            >>> rawAWD.L5(binarize=False)
            0.XXXX
        """

        if binarize is True:
            data = self.binarized_data(threshold)
        else:
            data = self.data

        # n_epochs = int(pd.Timedelta('5H')/self.frequency)

        _, l5 = _lmx(data, '5H', lowest=True)

        return l5

[docs]    def M10(self, binarize=True, threshold=4):
        r"""M10

        Mean activity during the 10 most active hours of the day.

        Parameters
        ----------
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.

        Returns
        -------
        m10: float


        Notes
        -----

        The M10 [1]_ variable is calculated as the mean, per acquisition period
        , of the average daily activities during the 10 most active hours.

        .. warning:: The value of this variable depends on the length of the
                     acquisition period.

        References
        ----------

        .. [1] Van Someren, E.J.W., Lijzenga, C., Mirmiran, M., Swaab, D.F.
               (1997). Long-Term Fitness Training Improves the Circadian
               Rest-Activity Rhythm in Healthy Elderly Males.
               Journal of Biological Rhythms, 12(2), 146–156.
               http://doi.org/10.1177/074873049701200206

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.M10()
            0.XXXX
            >>> rawAWD.M10(binarize=False)
            0.XXXX
        """

        if binarize is True:
            data = self.binarized_data(threshold)
        else:
            data = self.data

        # n_epochs = int(pd.Timedelta('10H')/self.frequency)

        _, m10 = _lmx(data, '10H', lowest=False)

        return m10

[docs]    def RA(self, binarize=True, threshold=4):
        r"""Relative rest/activity amplitude

        Relative amplitude between the mean activity during the 10 most active
        hours of the day and the mean activity during the 5 least active hours
        of the day.

        Parameters
        ----------
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.

        Returns
        -------
        ra: float


        Notes
        -----

        The RA [1]_ variable is calculated as:

        .. math::

            RA = \frac{M10 - L5}{M10 + L5}

        References
        ----------

        .. [1] Van Someren, E.J.W., Lijzenga, C., Mirmiran, M., Swaab, D.F.
               (1997). Long-Term Fitness Training Improves the Circadian
               Rest-Activity Rhythm in Healthy Elderly Males.
               Journal of Biological Rhythms, 12(2), 146–156.
               http://doi.org/10.1177/074873049701200206

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.RA()
            0.XXXX
            >>> rawAWD.RA(binarize=False)
            0.XXXX
        """

        if binarize is True:
            data = self.binarized_data(threshold)
        else:
            data = self.data

        # n_epochs = int(pd.Timedelta('5H')/self.frequency)

        _, l5 = _lmx(data, '5H', lowest=True)
        _, m10 = _lmx(data, '10H', lowest=False)

        return (m10-l5)/(m10+l5)

[docs]    def L5p(self, period='7D', binarize=True, threshold=4, verbose=False):
        r"""L5 per period

        The L5 variable is calculated for each consecutive period found in the
        actigraphy recording.

        Parameters
        ----------
        period: str, optional
            Time period for the calculation of IS
            Default is '7D'.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.
        verbose: bool, optional
            If set to True, display the number of periods found in the activity
            recording, as well as the time not accounted for.
            Default is False.

        Returns
        -------
        l5p: list of float


        Notes
        -----

        The L5 [1]_ variable is calculated as the mean, per acquisition period,
        of the average daily activities during the 5 least active hours.

        .. warning:: The value of this variable depends on the length of the
                     acquisition period.

        References
        ----------

        .. [1] Van Someren, E.J.W., Lijzenga, C., Mirmiran, M., Swaab, D.F.
               (1997). Long-Term Fitness Training Improves the Circadian
               Rest-Activity Rhythm in Healthy Elderly Males.
               Journal of Biological Rhythms, 12(2), 146–156.
               http://doi.org/10.1177/074873049701200206

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.duration()
            Timedelta('12 days 18:41:00')
            >>> rawAWD.L5p(period='5D',verbose=True)
            Number of periods: 2
            Time unaccounted for: 2 days, 19h, 0m, 0s
            [0.XXXX, 0.XXXX]
        """

        if binarize is True:
            data = self.binarized_data(threshold)
        else:
            data = self.data

        # n_epochs = int(pd.Timedelta('5H')/self.frequency)

        intervals = _interval_maker(data.index, period, verbose)

        results = [
            _lmx(
                data[time[0]:time[1]],
                '5H',
                lowest=True
            ) for time in intervals
        ]
        return [res[1] for res in results]

[docs]    def M10p(self, period='7D', binarize=True, threshold=4, verbose=False):
        r"""M10 per period

        The M10 variable is calculated for each consecutive period found in the
        actigraphy recording.

        Parameters
        ----------
        period: str, optional
            Time period for the calculation of IS
            Default is '7D'.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.
        verbose: bool, optional
            If set to True, display the number of periods found in the activity
            recording, as well as the time not accounted for.
            Default is False.

        Returns
        -------
        m10p: list of float


        Notes
        -----

        The M10 [1]_ variable is calculated as the mean, per acquisition period
        , of the average daily activities during the 10 most active hours.

        .. warning:: The value of this variable depends on the length of the
                     acquisition period.

        References
        ----------

        .. [1] Van Someren, E.J.W., Lijzenga, C., Mirmiran, M., Swaab, D.F.
               (1997). Long-Term Fitness Training Improves the Circadian
               Rest-Activity Rhythm in Healthy Elderly Males.
               Journal of Biological Rhythms, 12(2), 146–156.
               http://doi.org/10.1177/074873049701200206

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.duration()
            Timedelta('12 days 18:41:00')
            >>> rawAWD.M10p(period='5D',verbose=True)
            Number of periods: 2
            Time unaccounted for: 2 days, 19h, 0m, 0s
            [0.XXXX, 0.XXXX]
        """

        if binarize is True:
            data = self.binarized_data(threshold)
        else:
            data = self.data

        # n_epochs = int(pd.Timedelta('10H')/self.frequency)

        intervals = _interval_maker(data.index, period, verbose)

        results = [
            _lmx(
                data[time[0]:time[1]],
                '10H',
                lowest=False
            ) for time in intervals
        ]
        return [res[1] for res in results]

[docs]    def RAp(self, period='7D', binarize=True, threshold=4, verbose=False):
        r"""RA per period

        The RA variable is calculated for each consecutive period found in the
        actigraphy recording.

        Parameters
        ----------
        period: str, optional
            Time period for the calculation of IS
            Default is '7D'.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.
        verbose: bool, optional
            If set to True, display the number of periods found in the activity
            recording, as well as the time not accounted for.
            Default is False.

        Returns
        -------
        rap: list of float


        Notes
        -----

        The RA [1]_ variable is calculated as:

        .. math::

            RA = \frac{M10 - L5}{M10 + L5}

        References
        ----------

        .. [1] Van Someren, E.J.W., Lijzenga, C., Mirmiran, M., Swaab, D.F.
               (1997). Long-Term Fitness Training Improves the Circadian
               Rest-Activity Rhythm in Healthy Elderly Males.
               Journal of Biological Rhythms, 12(2), 146–156.
               http://doi.org/10.1177/074873049701200206

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.duration()
            Timedelta('12 days 18:41:00')
            >>> rawAWD.RAp(period='5D',verbose=True)
            Number of periods: 2
            Time unaccounted for: 2 days, 19h, 0m, 0s
            [0.XXXX, 0.XXXX]
        """

        if binarize is True:
            data = self.binarized_data(threshold)
        else:
            data = self.data

        # n_epochs = int(pd.Timedelta('5H')/self.frequency)

        intervals = _interval_maker(data.index, period, verbose)

        results = []

        for time in intervals:
            data_subset = data[time[0]:time[1]]
            _, l5 = _lmx(data_subset, '5H', lowest=True)
            _, m10 = _lmx(data_subset, '10H', lowest=False)
            results.append((m10-l5)/(m10+l5))

        return results

    # @lru_cache(maxsize=6)
[docs]    def IS(self, freq='1H', binarize=True, threshold=4):
        r"""Interdaily stability

        The Interdaily stability (IS) quantifies the repeatibilty of the
        daily rest-activity pattern over each day contained in the activity
        recording.

        Parameters
        ----------
        freq: str, optional
            Data resampling `frequency string
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_.
            Default is '1H'.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.

        Returns
        -------
        is: float


        Notes
        -----

        This variable is defined in ref [1]_:

        .. math::

            IS = \frac{d^{24h}}{d^{1h}}

        with:

        .. math::

            d^{1h} = \sum_{i}^{n}\frac{\left(x_{i}-\bar{x}\right)^{2}}{n}

        where :math:`x_{i}` is the number of active (counts higher than a
        predefined threshold) minutes during the :math:`i^{th}` period,
        :math:`\bar{x}` is the mean of all data and :math:`n` is the number of
        periods covered by the actigraphy data and with:

        .. math::

            d^{24h} = \sum_{i}^{p} \frac{
                      \left( \bar{x}_{h,i} - \bar{x} \right)^{2}
                      }{p}

        where :math:`\bar{x}^{h,i}` is the average number of active minutes
        over the :math:`i^{th}` period and :math:`p` is the number of periods
        per day. The average runs over all the days.

        For the record, tt is the 24h value from the chi-square periodogram
        (Sokolove and Bushel1 1978).

        References
        ----------

        .. [1] Witting W., Kwa I.H., Eikelenboom P., Mirmiran M., Swaab D.F.
               Alterations in the circadian rest–activity rhythm in aging and
               Alzheimer׳s disease. Biol Psychiatry. 1990;27:563–572.

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.IS()
            0.6900175913031027
            >>> rawAWD.IS(freq='30min', binarize=True, threshold=4)
            0.6245582891144925
            >>> rawAWD.IS(freq='1H', binarize=False)
            0.5257020914453097
        """

        data = self.resampled_data(
            freq=freq,
            binarize=binarize,
            threshold=threshold
        )
        return _interdaily_stability(data)

[docs]    def ISm(
        self,
        freqs=[
            '1T', '2T', '3T', '4T', '5T', '6T', '8T', '9T', '10T',
            '12T', '15T', '16T', '18T', '20T', '24T', '30T',
            '32T', '36T', '40T', '45T', '48T', '60T'
        ],
        binarize=True,
        threshold=4
    ):
        r"""Average interdaily stability

        ISm [1]_ is the average of the IS values obtained with resampling
        periods divisors of 1440 between 1 and 60 min.

        Parameters
        ----------
        freq: str, optional
            Data resampling `frequency strings
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is set to 4.

        Returns
        -------
        ism: float

        Notes
        -----

        By default, the resampling periods are 1, 2, 3, 4, 5, 6, 8, 9, 10, 12,
        15, 16, 18, 20, 24, 30, 32, 36, 40, 45, 48 and 60 min.

        References
        ----------

        .. [1] Gonçalves, B. S., Cavalcanti, P. R., Tavares, G. R.,
               Campos, T. F., & Araujo, J. F. (2014). Nonparametric methods in
               actigraphy: An update. Sleep science (Sao Paulo, Brazil), 7(3),
               158-64.

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.ISm()
            0.5758268227551039
            >>> rawAWD.ISm(binarize=False)
            0.3915874151855646
            >>> rawAWD.ISm(freqs=['10min','30min','1H'], binarize=False)
            0.44598210450842063
        """

        data = [
            self.resampled_data(freq, binarize, threshold) for freq in freqs
        ]

        return mean([_interdaily_stability(datum) for datum in data])

[docs]    def ISp(self, period='7D', freq='1H',
            binarize=True, threshold=4, verbose=False):
        r"""Interdaily stability per period

        The IS is calculated for each consecutive period found in the
        actigraphy recording.

        Parameters
        ----------
        period: str, optional
            Time period for the calculation of IS
            Default is '7D'.
        freq: str, optional
            Data resampling `frequency string
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_.
            Default is '1H'.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.
        verbose: bool, optional
            If set to True, display the number of periods found in the activity
            recording, as well as the time not accounted for.
            Default is False.

        Returns
        -------
        isp: list of float


        Notes
        -----

        Periods are consecutive and all of the required duration. If the last
        consecutive period is shorter than required, the IS is not calculated
        for that period.


        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.duration()
            Timedelta('12 days 18:41:00')
            >>> rawAWD.ISp(period='5D',verbose=True)
            Number of periods: 2
            Time unaccounted for: 2 days, 19h, 0m, 0s
            [0.7565263007902066, 0.866544730769211]
        """
        data = self.resampled_data(freq, binarize, threshold)

        intervals = _interval_maker(data.index, period, verbose)

        results = [
            _interdaily_stability(data[time[0]:time[1]]) for time in intervals
        ]
        return results

    # @lru_cache(maxsize=6)
[docs]    def IV(self, freq='1H', binarize=True, threshold=4):
        r"""Intradaily variability

        The Intradaily Variability (IV) quantifies the variability of the
        activity recording. This variable thus measures the rest or activity
        fragmentation.

        Parameters
        ----------
        freq: str, optional
            Data resampling `frequency string
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_.
            Default is '1H'.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.

        Returns
        -------
        iv: float

        Notes
        -----

        It is defined in ref [1]_:

        .. math::

            IV = \frac{c^{1h}}{d^{1h}}

        with:

        .. math::

            d^{1h} = \sum_{i}^{n}\frac{\left(x_{i}-\bar{x}\right)^{2}}{n}

        where :math:`x_{i}` is the number of active (counts higher than a
        predefined threshold) minutes during the :math:`i^{th}` period,
        :math:`\bar{x}` is the mean of all data and :math:`n` is the number of
        periods covered by the actigraphy data,

        and with:

        .. math::

            c^{1h} = \sum_{i}^{n-1} \frac{
                        \left( x_{i+1} - x_{i} \right)^{2}
                     }{n-1}

        References
        ----------

        .. [1] Witting W., Kwa I.H., Eikelenboom P., Mirmiran M., Swaab D.F.
               Alterations in the circadian rest–activity rhythm in aging and
               Alzheimer׳s disease. Biol Psychiatry. 1990;27:563–572.

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.IV()
            0.46185426426324316
            >>> rawAWD.IV(freq='30min', binarize=True, threshold=4)
            0.4150769573937417
            >>> rawAWD.IV(freq='1H', binarize=False)
            0.7859579446494547
        """
        data = self.resampled_data(freq, binarize, threshold)

        return _intradaily_variability(data)

[docs]    def IVm(
        self,
        freqs=[
            '1T', '2T', '3T', '4T', '5T', '6T', '8T', '9T', '10T',
            '12T', '15T', '16T', '18T', '20T', '24T', '30T',
            '32T', '36T', '40T', '45T', '48T', '60T'
        ],
        binarize=True,
        threshold=4
    ):
        r"""Average intradaily variability

        IVm [1]_ is the average of the IV values obtained with resampling
        periods divisors of 1440 between 1 and 60 min.

        Parameters
        ----------
        freq: str, optional
            Data resampling `frequency strings
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is set to 4.

        Returns
        -------
        ivm: float

        Notes
        -----

        By default, the resampling periods are 1, 2, 3, 4, 5, 6, 8, 9, 10, 12,
        15, 16, 18, 20, 24, 30, 32, 36, 40, 45, 48 and 60 min.

        References
        ----------

        .. [1] Gonçalves, B. S., Cavalcanti, P. R., Tavares, G. R.,
               Campos, T. F., & Araujo, J. F. (2014). Nonparametric methods in
               actigraphy: An update. Sleep science (Sao Paulo, Brazil), 7(3),
               158-64.

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.IVm()
            0.3482306825356382
            >>> rawAWD.IVm(binarize=False)
            0.6414533006190071
            >>> rawAWD.IVm(freqs=['10min','30min','1H'], binarize=False)
            0.7124465677737196
        """

        data = [
            self.resampled_data(freq, binarize, threshold) for freq in freqs
        ]

        return mean([_intradaily_variability(datum) for datum in data])

[docs]    def IVp(self, period='7D', freq='1H',
            binarize=True, threshold=4, verbose=False):
        r"""Intradaily variability per period

        The IV is calculated for each consecutive period found in the
        actigraphy recording.

        Parameters
        ----------
        period: str, optional
            Time period for the calculation of IS
            Default is '7D'.
        freq: str, optional
            Data resampling `frequency string
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_.
            Default is '1H'.
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
            Default is 4.
        verbose: bool, optional
            If set to True, display the number of periods found in the activity
            recording, as well as the time not accounted for.
            Default is False.

        Returns
        -------
        ivp: list of float


        Notes
        -----

        Periods are consecutive and all of the required duration. If the last
        consecutive period is shorter than required, the IV is not calculated
        for that period.


        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.duration()
            Timedelta('12 days 18:41:00')
            >>> rawAWD.IVp(period='5D',verbose=True)
            Number of periods: 2
            Time unaccounted for: 2 days, 19h, 0m, 0s
            [0.4011232866522594, 0.5340044506337185]
        """

        data = self.resampled_data(freq, binarize, threshold)

        intervals = _interval_maker(data.index, period, verbose)

        results = [
            _intradaily_variability(data[time[0]:time[1]])
            for time in intervals
        ]
        return results

[docs]    def pRA(self, threshold, start=None, period=None):
        r"""Rest->Activity transition probability distribution

        Conditional probability, pRA(t), that an individual would be
        resting at time (t+1) given that the individual had been continuously
        active for the preceding t epochs, defined in [1]_ as:

        .. math::
            pRA(t) = p(A|R_t) = \frac{N_t - N_{t+1}}{N_t}

        with :math:`N_t`, the total number of sequences of rest (i.e. activity
        below threshold) of duration :math:`t` or longer.

        Parameters
        ----------
        threshold: int
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
        start: str, optional
            If not None, the actigraphy recording is truncated to
            'start:start+period', each day. Start string format: 'HH:MM:SS'.
            Default is None
        period: str, optional
            Time period for the calculation of pRA.
            Default is None.

        Returns
        -------
        pra: pandas.core.series.Series
            Transition probabilities (pRA(t)), calculated for all t values.
        pra_weights: pandas.core.series.Series
            Weights are defined as the square root of the number of activity
            sequences contributing to each probability estimate.

        Notes
        -----

        pRA is corrected for discontinuities due to sparse data, as defined in
        [1]_.

        References
        ----------

        .. [1] Lim, A. S. P., Yu, L., Costa, M. D., Buchman, A. S.,
               Bennett, D. A., Leurgans, S. E., & Saper, C. B. (2011).
               Quantification of the Fragmentation of Rest-Activity Patterns in
               Elderly Individuals Using a State Transition Analysis. Sleep,
               34(11), 1569–1581. http://doi.org/10.5665/sleep.1400

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> pRA, pRA_weights = rawAWD.pRA(4, start='00:00:00', period='8H')
            >>> pRA
            counts
            1      0.169043
            2      0.144608
            3      0.163324
            (...)
            481    0.001157
            Name: counts, dtype: float64
        """

        # Restrict data range to period 'Start, Start+Period'
        if start is not None:
            end = _td_format(
                pd.Timedelta(start)+pd.Timedelta(period)
            )

            data = self.binarized_data(
                threshold
            ).between_time(start, end)
        else:
            data = self.binarized_data(threshold)
        # Rest->Activity transition probability:
        pRA, pRA_weights = _transition_prob(
            data, True
        )

        return pRA, pRA_weights

[docs]    def pAR(self, threshold, start=None, period=None):
        r"""Activity->Rest transition probability distribution

        Conditional probability, pAR(t), that an individual would be
        active at time (t+1) given that the individual had been continuously
        resting for the preceding t epochs, defined in [1]_ as:

        .. math::
            pAR(t) = p(R|A_t) = \frac{N_t - N_{t+1}}{N_t}

        with :math:`N_t`, the total number of sequences of activity (i.e.
        activity above threshold) of duration :math:`t` or longer.

        Parameters
        ----------
        threshold: int
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
        start: str, optional
            If not None, the actigraphy recording is truncated to
            'start:start+period', each day. Start string format: 'HH:MM:SS'.
            Default is None
        period: str, optional
            Time period for the calculation of pAR.
            Default is None.

        Returns
        -------
        par: pandas.core.series.Series
            Transition probabilities (pAR(t)), calculated for all t values.
        par_weights: pandas.core.series.Series
            Weights are defined as the square root of the number of activity
            sequences contributing to each probability estimate.

        Notes
        -----

        pAR is corrected for discontinuities due to sparse data, as defined in
        [1]_.

        References
        ----------

        .. [1] Lim, A. S. P., Yu, L., Costa, M. D., Buchman, A. S.,
               Bennett, D. A., Leurgans, S. E., & Saper, C. B. (2011).
               Quantification of the Fragmentation of Rest-Activity Patterns in
               Elderly Individuals Using a State Transition Analysis. Sleep,
               34(11), 1569–1581. http://doi.org/10.5665/sleep.1400

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> pAR, pAR_weights = rawAWD.pAR(4, start='00:00:00', period='8H')
            >>> pAR
            counts
            1      0.169043
            2      0.144608
            3      0.163324
            (...)
            481    0.001157
            Name: counts, dtype: float64
        """

        # Restrict data range to period 'Start, Start+Period'
        if start is not None:
            end = _td_format(
                pd.Timedelta(start)+pd.Timedelta(period)
            )

            data = self.binarized_data(
                threshold
            ).between_time(start, end)
        else:
            data = self.binarized_data(threshold)
        # Activity->Rest transition probability:
        pAR, pAR_weights = _transition_prob(
            data, False
        )

        return pAR, pAR_weights

[docs]    def kRA(
        self, threshold, start=None, period=None, frac=.3, it=0, logit=False,
        freq=None, offset='15min'
    ):
        r"""Rest->Activity transition probability

        Weighted average value of pRA(t) within the constant regions, defined
        as the longest stretch within which the LOWESS curve varied by no more
        than 1 standard deviation of the pRA(t) curve [1]_.

        Parameters
        ----------
        threshold: int
            Above this threshold, data are classified as active (1) and as
            rest (0) otherwise.
        start: str, optional
            If not None, the actigraphy recording is truncated to
            'start:start+period', each day. Start string format: 'HH:MM:SS'.
            Special keywords ('AonT' or 'AoffT') are allowed. In this case, the
            start is set to the activity onset ('AonT') or offset ('AoffT')
            time derived from the daily profile. Cf sleep.AonT/AoffT functions
            for more informations.
            Default is None
        period: str, optional
            Time period for the calculation of pRA.
            Default is None.
        frac: float, optional
            Fraction of the data used when estimating each value.
            Default is 0.3.
        it: int, optional
            Number of residual-based reweightings to perform.
            Default is 0.
        logit: bool, optional
            If True, the kRA value is logit-transformed (ln(p/1-p)). Useful
            when kRA is used in a regression model.
            Default is False.
        freq: str, optional
            Data resampling `frequency string
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_
            applied to the daily profile if start='AonT' or 'AoffT'.
            Default is None.
        offset: str, optional
            Time offset with respect to the activity onset and offset times
            used as start times.
            Default is '15min'.

        Returns
        -------
        kra: float

        References
        ----------

        .. [1] Lim, A. S. P., Yu, L., Costa, M. D., Buchman, A. S.,
               Bennett, D. A., Leurgans, S. E., & Saper, C. B. (2011).
               Quantification of the Fragmentation of Rest-Activity Patterns in
               Elderly Individuals Using a State Transition Analysis. Sleep,
               34(11), 1569–1581. http://doi.org/10.5665/sleep.1400

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.kRA(4)
            0.09144435545010564
            >>> rawAWD.kRA(4, start='00:00:00', period='8H')
            0.13195826220778709
        """

        if start is not None and re.match(r'AonT|AoffT', start):
            aont = self.AonT(freq=freq, binarize=True, threshold=threshold)
            aofft = self.AoffT(freq=freq, binarize=True, threshold=threshold)
            offset = pd.Timedelta(offset)
            if start == 'AonT':
                start_time = str(aont+offset).split(' ')[-1]
                period = str(
                    pd.Timedelta('24H') - ((aont+offset) - (aofft-offset))
                ).split(' ')[-1]
            elif start == 'AoffT':
                start_time = str(aofft+offset).split(' ')[-1]
                period = str(
                    pd.Timedelta('24H') - ((aofft+offset) - (aont-offset))
                ).split(' ')[-1]
        else:
            start_time = start

        # Calculate the pRA probabilities and their weights.
        pRA, pRA_weights = self.pRA(threshold, start=start_time, period=period)
        # Fit the pRA distribution with a LOWESS and return mean value for
        # the constant region (i.e. the region where |pRA-lowess|<1SD)
        kRA = _transition_prob_sustain_region(
            pRA,
            pRA_weights,
            frac=frac,
            it=it
            )
        return np.log(kRA/(1-kRA)) if logit else kRA

[docs]    def kAR(
        self, threshold, start=None, period=None, frac=.3, it=0, logit=False,
        freq=None, offset='15min'
    ):
        r"""Rest->Activity transition probability

        Weighted average value of pAR(t) within the constant regions, defined
        as the longest stretch within which the LOWESS curve varied by no more
        than 1 standard deviation of the pAR(t) curve [1]_.

        Parameters
        ----------
        threshold: int
            Above this threshold, data are classified as active (1) and as
            rest (0) otherwise.
        start: str, optional
            If not None, the actigraphy recording is truncated to
            'start:start+period', each day. Start string format: 'HH:MM:SS'.
            Special keywords ('AonT' or 'AoffT') are allowed. In this case, the
            start is set to the activity onset ('AonT') or offset ('AoffT')
            time derived from the daily profile. Cf sleep.AonT/AoffT functions
            for more informations.
            Default is None
        period: str, optional
            Time period for the calculation of pRA.
            Default is None.
        frac: float
            Fraction of the data used when estimating each value.
            Default is 0.3.
        it: int
            Number of residual-based reweightings to perform.
            Default is 0.
        logit: bool, optional
            If True, the kRA value is logit-transformed (ln(p/1-p)). Useful
            when kRA is used in a regression model.
            Default is False.
        freq: str, optional
            Data resampling `frequency string
            <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_
            applied to the daily profile if start='AonT' or 'AoffT'.
            Default is None.
        offset: str, optional
            Time offset with respect to the activity onset and offset times
            used as start times.
            Default is '15min'.

        Returns
        -------
        kar: float

        References
        ----------

        .. [1] Lim, A. S. P., Yu, L., Costa, M. D., Buchman, A. S.,
               Bennett, D. A., Leurgans, S. E., & Saper, C. B. (2011).
               Quantification of the Fragmentation of Rest-Activity Patterns in
               Elderly Individuals Using a State Transition Analysis. Sleep,
               34(11), 1569–1581. http://doi.org/10.5665/sleep.1400

        Examples
        --------

            >>> import pyActigraphy
            >>> rawAWD = pyActigraphy.io.read_raw_awd(fpath + 'SUBJECT_01.AWD')
            >>> rawAWD.kAR(4)
            0.041397590252332916
            >>> rawAWD.kAR(4, start='08:00:00', period='12H')
            0.04372712642257519
        """

        if start is not None and re.match(r'AonT|AoffT', start):
            aont = self.AonT(freq=freq, binarize=True, threshold=threshold)
            aofft = self.AoffT(freq=freq, binarize=True, threshold=threshold)
            offset = pd.Timedelta(offset)
            if start == 'AonT':
                start_time = str(aont+offset).split(' ')[-1]
                period = str(
                    pd.Timedelta('24H') - ((aont+offset) - (aofft-offset))
                ).split(' ')[-1]
            elif start == 'AoffT':
                start_time = str(aofft+offset).split(' ')[-1]
                period = str(
                    pd.Timedelta('24H') - ((aofft+offset) - (aont-offset))
                ).split(' ')[-1]
        else:
            start_time = start

        # Calculate the pAR probabilities and their weights.
        pAR, pAR_weights = self.pAR(threshold, start=start_time, period=period)
        # Fit the pAR distribution with a LOWESS and return mean value for
        # the constant region (i.e. the region where |pAR-lowess|<1SD)
        kAR = _transition_prob_sustain_region(
            pAR,
            pAR_weights,
            frac=frac,
            it=it
            )
        return np.log(kAR/(1-kAR)) if logit else kAR


class ForwardMetricsMixin(object):
    """ Mixin Class """

    # def mask_fraction(self):
    #
    #     return {
    #         iread.display_name:
    # iread.mask_fraction() for iread in self.readers
    #     }
    #
    # def start_time(self):
    #
    #     return {
    #         iread.display_name:
    # str(iread.start_time) for iread in self.readers
    #     }
    #
    # def duration(self):
    #
    #     return {
    #         iread.display_name:
    # str(iread.duration()) for iread in self.readers
    #     }

    def ADAT(
        self, binarize=True, threshold=4, rescale=True, exclude_ends=False
    ):
        """Total average daily activity

        Calculate the total activity counts, averaged over all the days.

        Parameters
        ----------
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
        rescale: bool, optional
            If set to True, the activity counts are rescaled to account for
            masked periods (if any).
            Default is True.
        exclude_ends: bool, optional
            If set to True, the first and last daily periods are excluded from
            the calculation. Useful when the recording does start or end at
            midnigth.
            Default is False.

        Returns
        -------
        adat : dict
            Dictionary with filenames as keys and ADAT as values.
        """

        return {
            iread.display_name: iread.ADAT(
                binarize=binarize,
                threshold=threshold,
                rescale=rescale,
                exclude_ends=exclude_ends
            ) for iread in self.readers
        }

    def ADATp(
        self,
        period='7D',
        binarize=True,
        threshold=4,
        rescale=True,
        exclude_ends=False,
        verbose=False
    ):
        """Total average daily activity per period

        Calculate the total activity counts, averaged over each consecutive
        period contained in the data. The number of periods

        Parameters
        ----------
        period: str, optional
            Time length of the period to be considered. Must be understandable
            by pandas.Timedelta
        binarize: bool, optional
            If set to True, the data are binarized.
            Default is True.
        threshold: int, optional
            If binarize is set to True, data above this threshold are set to 1
            and to 0 otherwise.
        rescale: bool, optional
            If set to True, the activity counts are rescaled to account for
            masked periods (if any).
            Default is True.
        exclude_ends: bool, optional
            If set to True, the first and last daily periods are excluded from
            the calculation. Useful when the recording does start or end at
            midnigth.
            Default is False.
        verbose: bool, optional
            If set to True, display the number of periods found in the data.
            Also display the time not accounted for.
            Default is False.

        Returns
        -------
        adatp : list of int
            Dictionary with filenames as keys and a list of ADAT per period
            as values.
        """

        return {
            iread.display_name: iread.ADATp(
                period=period,
                binarize=binarize,
                threshold=threshold,
                rescale=rescale,
                exclude_ends=exclude_ends,
                verbose=verbose
            ) for iread in self.readers
        }

    def L5(self, binarize=True, threshold=4):

        return {
            iread.display_name: iread.L5(
                binarize=binarize,
                threshold=threshold
            ) for iread in self.readers
        }

    def M10(self, binarize=True, threshold=4):

        return {
            iread.display_name: iread.M10(
                binarize=binarize,
                threshold=threshold
            ) for iread in self.readers
        }

    def RA(self, binarize=True, threshold=4):

        return {
            iread.display_name: iread.RA(
                binarize=binarize,
                threshold=threshold
            ) for iread in self.readers
        }

    def L5p(self, period='7D', binarize=True, threshold=4, verbose=False):

        return {
            iread.display_name: iread.L5p(
                period=period,
                binarize=binarize,
                threshold=threshold,
                verbose=verbose
            ) for iread in self.readers
        }

    def M10p(self, period='7D', binarize=True, threshold=4, verbose=False):

        return {
            iread.display_name: iread.M10p(
                period=period,
                binarize=binarize,
                threshold=threshold,
                verbose=verbose
            ) for iread in self.readers
        }

    def RAp(self, period='7D', binarize=True, threshold=4, verbose=False):

        return {
            iread.display_name: iread.RAp(
                period=period,
                binarize=binarize,
                threshold=threshold,
                verbose=verbose
            ) for iread in self.readers
        }

    def IS(self, freq='1H', binarize=True, threshold=4):

        return {
            iread.display_name: iread.IS(
                freq=freq,
                binarize=binarize,
                threshold=threshold
            ) for iread in self.readers
        }

    def ISm(
        self,
        freqs=[
            '1T', '2T', '3T', '4T', '5T', '6T', '8T', '9T', '10T',
            '12T', '15T', '16T', '18T', '20T', '24T', '30T',
            '32T', '36T', '40T', '45T', '48T', '60T'
        ],
        binarize=True,
        threshold=4
    ):

        return {
            iread.display_name: iread.ISm(
                freqs=freqs,
                binarize=binarize,
                threshold=threshold
            ) for iread in self.readers
        }

    def ISp(self, period='7D', freq='1H',
            binarize=True, threshold=4, verbose=False):

        return {
            iread.display_name: iread.ISp(
                period=period,
                freq=freq,
                binarize=binarize,
                threshold=threshold,
                verbose=verbose
            ) for iread in self.readers
        }

    def IV(self, freq='1H', binarize=True, threshold=4):

        return {
            iread.display_name: iread.IV(
                freq=freq,
                binarize=binarize,
                threshold=threshold
            ) for iread in self.readers
        }

    def IVm(
        self,
        freqs=[
            '1T', '2T', '3T', '4T', '5T', '6T', '8T', '9T', '10T',
            '12T', '15T', '16T', '18T', '20T', '24T', '30T',
            '32T', '36T', '40T', '45T', '48T', '60T'
        ],
        binarize=True,
        threshold=4
    ):

        return {
            iread.display_name: iread.IVm(
                freqs=freqs,
                binarize=binarize,
                threshold=threshold
            ) for iread in self.readers
        }

    def IVp(self, period='7D', freq='1H',
            binarize=True, threshold=4, verbose=False):

        return {
            iread.display_name: iread.IVp(
                period=period,
                freq=freq,
                binarize=binarize,
                threshold=threshold,
                verbose=verbose
            ) for iread in self.readers
        }

    def kRA(self, threshold=4, start=None, period=None, frac=.3, it=0,
            logit=False, freq=None, offset='15min'):

        return {
            iread.display_name: iread.kRA(
                threshold=threshold,
                start=start,
                period=period,
                frac=frac,
                it=it,
                logit=logit,
                freq=freq,
                offset=offset
            ) for iread in self.readers
        }

    def kAR(self, threshold=4, start=None, period=None, frac=.3, it=0,
            logit=False, freq=None, offset='15min'):

        return {
            iread.display_name: iread.kAR(
                threshold=threshold,
                start=start,
                period=period,
                frac=frac,
                it=it,
                logit=logit,
                freq=freq,
                offset=offset
            ) for iread in self.readers
        }

    def AonT(self, freq='5min', whs=12, binarize=True, threshold=4):

        return {
            iread.display_name: iread.AonT(
                freq=freq,
                whs=whs,
                binarize=binarize,
                threshold=threshold
            ) for iread in self.readers
        }

    def AoffT(self, freq='5min', whs=12, binarize=True, threshold=4):

        return {
            iread.display_name: iread.AoffT(
                freq=freq,
                whs=whs,
                binarize=binarize,
                threshold=threshold
            ) for iread in self.readers
        }

    def fSoD(
        self,
        freq='5min',
        binarize=True,
        threshold=4,
        whs=12,
        start='12:00:00',
        period='5h',
        algo='Roenneberg',
        *args,
        **kwargs
    ):

        return {
            iread.display_name: iread.fSoD(
                freq=freq,
                binarize=binarize,
                threshold=threshold,
                whs=whs,
                start=start,
                period=period,
                algo=algo,
                *args,
                **kwargs
            ) for iread in self.readers
        }

    def average_daily_activity(
        self,
        freq,
        cyclic=False,
        binarize=True,
        threshold=4,
        time_origin=None,
        whs='1h'
    ):
        return {
            iread.display_name: iread.average_daily_activity(
                freq=freq,
                cyclic=cyclic,
                binarize=binarize,
                threshold=threshold,
                time_origin=time_origin,
                whs=whs
            ) for iread in self.readers
        }

    def average_daily_light(self, freq='5min', cyclic=False):

        return {
            iread.display_name:
            iread.average_daily_light(
                freq=freq,
                cyclic=cyclic
            ) for iread in self.readers
        }

    def Summary(self):

        # dict of dictionnaries
        ldic = {}
        ldic['Start_time'] = self.start_time()
        ldic['Mask_fraction'] = self.mask_fraction()
        ldic['Duration'] = self.duration()
        ldic['ADAT'] = self.ADAT()
        ldic['ADATp'] = self.ADATp()
        ldic['L5'] = self.L5()
        ldic['M10'] = self.M10()
        ldic['RA'] = self.RA()
        ldic['L5p'] = self.L5p()
        ldic['M10p'] = self.M10p()
        ldic['RAp'] = self.RAp()
        ldic['IS'] = self.IS()
        ldic['IV'] = self.IV()
        ldic['ISm'] = self.ISm()
        ldic['IVm'] = self.IVm()
        ldic['ISp'] = self.ISp()
        ldic['IVp'] = self.IVp()
        ldic['kRA(Midnight-5H)'] = self.kRA(start='00:00:00', period='5h')
        ldic['kAR(Noon-5H)'] = self.kAR(start='12:00:00', period='5h')
        ldic['AonT'] = self.AonT()
        ldic['AoffT'] = self.AoffT()
        ldic['fSoD(Noon-5H)'] = self.fSoD()
        if self.reader_type == 'RPX':
            ldic['average_daily_light'] = self.average_daily_light()

        # list keys of dictionnaries whose number of columns is variable:
        var_dic = ['ADATp', 'L5p', 'M10p', 'RAp', 'ISp', 'IVp']

        # list of corresponding dataframes
        dfs = []
        for key, value in ldic.items():
            columns = []
            if key in var_dic:
                # Get max length of value arrays
                max_length = np.max([len(x) for x in list(value.values())])
                for i in range(max_length):
                    columns.append(
                        key+'(duration={0},period={1})'.format('7D', i+1)
                    )
            else:
                columns.append(key)

            df = pd.DataFrame(
                list(value.values()),
                index=value.keys(),
                columns=columns
            )
            dfs.append(df)

        # join the dataframes recursively
        from functools import reduce
        df = reduce((lambda x, y: x.join(y)), dfs)
        return df