Source code for abraxos.utils

"""Utility functions for DataFrame operations."""

from __future__ import annotations

import typing as t

import numpy as np
import pandas as pd

__all__ = ['split', 'clear', 'to_records']


[docs] def split( df: pd.DataFrame, i: int = 2 ) -> tuple[pd.DataFrame, ...]: """ Splits a DataFrame into `i` approximately equal parts. Parameters ---------- df : pd.DataFrame The DataFrame to be split. i : int, optional The number of parts to split the DataFrame into (default is 2). Returns ------- tuple of pd.DataFrame A tuple containing `i` DataFrames, each being a partition of the original DataFrame. Examples -------- >>> import pandas as pd >>> import abraxos >>> df = pd.DataFrame({'A': range(10)}) >>> abraxos.split(df, 3) ( A 0 0 1 1 2 2 3 3, A 4 4 5 5 6 6, A 7 7 8 8 9 9) """ return tuple(map(pd.DataFrame, np.array_split(df, i)))
[docs] def clear(df: pd.DataFrame) -> pd.DataFrame: """ Returns an empty DataFrame with the same schema (columns and dtypes) as the input. Parameters ---------- df : pd.DataFrame The input DataFrame. Returns ------- pd.DataFrame An empty DataFrame with the same structure as `df`. Examples -------- >>> df = pd.DataFrame({'x': [1, 2, 3]}) >>> clear(df) Empty DataFrame Columns: [x] Index: [] """ return df.iloc[:0]
[docs] def to_records(df: pd.DataFrame) -> list[dict[t.Any, t.Any]]: """ Converts a DataFrame to a list of record dictionaries, replacing NaN with None. This is useful for inserting into databases that expect `None` for nulls. Parameters ---------- df : pd.DataFrame The DataFrame to convert. Returns ------- list of dict A list of records (dicts), where each dict is a row in the DataFrame. Examples -------- >>> df = pd.DataFrame({'a': [1, None], 'b': ['x', 'y']}) >>> to_records(df) [{'a': 1.0, 'b': 'x'}, {'a': None, 'b': 'y'}] """ df = df.fillna(np.nan).replace(np.nan, None) return df.to_dict('records') # type: ignore[no-any-return]