Source code for champpy.utils.time_utils

import pandas as pd
import numpy as np
import logging

logger = logging.getLogger(__name__)


def _parse_datetime(dt) -> pd.Series:
    """Parse input to pd.Timestamp.
    Args:
        dt: Input datetime as string, pd.Timestamp, or datetime.datetime
    Returns:
        pd.Series: Series of pd.Timestamp
    """
    try:
        if isinstance(dt, pd.DatetimeIndex):
            dt_out = pd.Series(dt)
        elif not isinstance(dt, pd.Series):
            dt_out = pd.Series([dt])
        else:
            dt_out = dt
        return pd.to_datetime(dt_out)
    except Exception as e:
        mssg = f"Failed to parse datetime from input {dt}: {e}"
        logger.error(mssg)
        raise ValueError(mssg)


def get_day_index(dt: pd.Timestamp | pd.Series | str, temp_res: float) -> pd.Series:
    """
    Get the index of a time within a day based on temporal resolution.

    Args:
        dt: datetime.time, pd.Timestamp, pd.Series of timestamps, or hours as float
        temp_res: temporal resolution in hours

    Returns:
        pd.Series: index within the day (0-based)

    Example:
        temp_res = 1.0 (hourly)
        10:00 -> index = 10

        temp_res = 0.5 (30 minutes)
        10:00 -> index = 20
        10:30 -> index = 21

        # Single timestamp
        >>> get_day_index(dt = pd.Timestamp('2026-01-21 10:30:00'), temp_res = 0.25)
        42

        # Vectorized Series
        >>> get_day_index(dt = pd.Series([pd.Timestamp('2026-01-21 10:00:00'), pd.Timestamp('2026-01-21 15:30:00')]), temp_res = 0.25)
        0    40
        1    62
        dtype: int64
    """
    dt = _parse_datetime(dt)
    hour = dt.dt.hour + dt.dt.minute / 60 + dt.dt.second / 3600
    return pd.Series((hour / temp_res).astype(int), index=dt.index)


def get_week_index(dt: pd.Timestamp | pd.Series | str, temp_res: float) -> pd.Series:
    """
    Get the index within a week based on temporal resolution.

    Args:
        dt: pd.Timestamp or pd.Series of timestamps
        temp_res: temporal resolution in hours

    Returns:
        pd.Series: index within the week (0-based)

    Example:
        temp_res = 1.0 (hourly)
        Monday 10:00 -> index = 10 (0*24 + 10)
        Tuesday 15:00 -> index = 39 (1*24 + 15)

        temp_res = 0.25 (15 minutes)
        Monday 10:00 -> index = 40 (0*96 + 40)
        Tuesday 15:00 -> index = 156 (1*96 + 60)

        # Single timestamp
        >>> get_week_index(dt = pd.Timestamp('2026-01-21 10:30:00'), temp_res = 0.25)
        138  # Tuesday 10:30 (1*96 + 42)

        # Vectorized Series
        >>> get_week_index(dt = pd.Series([pd.Timestamp('2026-01-20 10:00:00'), pd.Timestamp('2026-01-21 15:30:00')]), temp_res = 0.25)
        1    158  # Tuesday 15:30 (1*96 + 62)
        dtype: int64
    """
    # Parse datetime
    dt = _parse_datetime(dt)

    # Determine weekday index
    weekday = dt.dt.dayofweek  # Monday=0, Sunday=6
    day_idx = get_day_index(dt, temp_res)
    indices_per_day = int(24 / temp_res)
    return pd.Series(weekday * indices_per_day + day_idx, index=dt.index)


def get_datetime_array(
    start_date: pd.Timestamp,
    end_date: pd.Timestamp,
    temp_res: float,
    number_days_buffer: int = 0,
) -> tuple[pd.DatetimeIndex, pd.Series]:
    """
    Create a datetime array with buffer days before and after the actual period.
    Returns (dt_array, mask_buffer).

    Parameters:
        start_date (pd.Timestamp): Start date of the actual period.
        end_date (pd.Timestamp): End date of the actual period.
        temp_res (float): Temporal resolution in hours.
        number_days_buffer (int): Number of buffer days before and after the period.
    """
    start_dt_wo_buffer = start_date.normalize()
    start_dt = start_dt_wo_buffer - pd.Timedelta(days=number_days_buffer)
    end_dt_wo_buffer = end_date.normalize() + pd.Timedelta(days=1) - pd.Timedelta(hours=temp_res)
    end_dt = end_dt_wo_buffer + pd.Timedelta(days=number_days_buffer)
    frequency = f"{temp_res}h"
    dt_array = pd.date_range(start=start_dt, end=end_dt, freq=frequency)

    mask_buffer = (dt_array < start_dt_wo_buffer) | (dt_array > end_dt_wo_buffer)
    return dt_array, mask_buffer



[docs]
class TypeDays:
    """
    Utility class to group weekdays into typedays.

    The class allows to define custom groups of weekdays (e.g., Mon-Fri, Sat-Sun) and provides methods to convert weekday indices to typeday indices.

    Parameters
    ----------
    groups : list[list[int]], optional
        List of weekday groups. Each inner list defines one typeday and contains
        weekday indices (``0=Monday`` ... ``6=Sunday``).

    Attributes
    ----------
    groups : list[list[int]]
        The defined groups of weekdays for each typeday.
    index : list[int]
        The index of each typeday (0-based).
    names : list[str]
        The names of each typeday based on the grouped weekdays (e.g., "Mon-Fri", "Sat-Sun").
    number : int
        The number of typedays defined.

    Examples
    --------
    >>> typedays = TypeDays(groups=[[0, 1, 2, 3, 4], [5, 6]])
    >>> typedays.index
    [0, 1]
    >>> typedays.names
    ['Mon-Fri', 'Sat-Sun']

    >>> TypeDays(groups=[[0, 1, 2, 3, 4], [5], [6]]).names
    ['Mon-Fri', 'Sat', 'Sun']

    >>> TypeDays(groups=[[0, 2, 4], [1, 3, 5, 6]]).names
    ['Mon-Fri', 'Tue-Sun']
    """

    def __init__(self, groups: list[list[int]] = [[0], [1], [2], [3], [4], [5], [6]]):
        """
        See Class docstring for parameters and example.
        """
        # Validate groups
        self._validate_groups(groups)

        self.groups = groups

        # Save index of typedays for quick lookup
        self.index = list(range(0, len(self.groups)))

        # save number of TypeDays
        self.number = len(self.groups)

        # save names of typedays
        weekday_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
        # generate names based on groups: Mon-Fri, Sat-Sun
        self.names = []
        for group in groups:
            if len(group) == 1:
                name = weekday_names[group[0]]
            else:
                name = f"{weekday_names[group[0]]}-{weekday_names[group[-1]]}"
            self.names.append(name)

    def _validate_groups(self, groups: list[list[int]]):
        """Validate typeday groups."""
        # Check validity of groups
        if groups is None or not isinstance(groups, list) or len(groups) == 0:
            mssg = "Typeday groups must be a non-empty list of lists."
            logger.error(mssg)
            raise ValueError(mssg)
        for group in groups:
            if not isinstance(group, list) or len(group) == 0:
                mssg = "Each typeday group must be a non-empty list of integers representing weekdays."
                logger.error(mssg)
                raise ValueError(mssg)
            for day in group:
                if day < 0 or day > 6:
                    mssg = f"Invalid weekday {day} in groups. Must be between 0 (Monday) and 6 (Sunday)."
                    logger.error(mssg)
                    raise ValueError(mssg)
        # validate that all days are covered and no duplicates
        all_days = [day for group in groups for day in group]
        if sorted(all_days) != list(range(7)):
            mssg = "All days (0-6) must be covered exactly once in typeday groups."
            logger.error(mssg)
            raise ValueError(mssg)


[docs]
    def weekday2typeday(self, index_weekday: int | pd.Series | np.ndarray) -> int | np.ndarray:
        """
        Convert weekday (0=Monday,..6=Sunday) to typeday index based on groups.

        Parameters
        ----------
        index_weekday : int | pandas.Series | numpy.ndarray
            Weekday index or array/series of weekday indices.

        Examples
        --------
        >>> typedays = TypeDays(groups=[[0, 1, 2, 3, 4], [5, 6]])
        >>> typedays.weekday2typeday(np.array([0, 1, 2, 3, 4, 5, 6, 4, 6]))
        [0, 0, 0, 0, 0, 1, 1, 0, 1]
        """
        if isinstance(index_weekday, int):
            # Single value
            for idx, group in enumerate(self.groups):
                if index_weekday in group:
                    return idx

        # Check for type of class
        if isinstance(index_weekday, pd.Series) or isinstance(index_weekday, pd.Index):
            # convert to numpy array for faster processing (ensure writable)
            index_weekday_array = index_weekday.to_numpy(copy=True)
        elif isinstance(index_weekday, np.ndarray):
            index_weekday_array = np.array(index_weekday, copy=True)
        else:
            raise TypeError("Input must be int, pd.Series, pd.Index, or np.ndarray.")

        for i, group in enumerate(self.groups):
            mask = np.isin(index_weekday_array, group)
            index_weekday_array[mask] = i  # 1-based
        return index_weekday_array