Source code for abraxos.utils
"""Utility functions for DataFrame operations."""
from __future__ import annotations
import typing as t
import numpy as np
import pandas as pd
__all__ = ['split', 'clear', 'to_records']
[docs]
def split(
df: pd.DataFrame,
i: int = 2
) -> tuple[pd.DataFrame, ...]:
"""
Splits a DataFrame into `i` approximately equal parts.
Parameters
----------
df : pd.DataFrame
The DataFrame to be split.
i : int, optional
The number of parts to split the DataFrame into (default is 2).
Returns
-------
tuple of pd.DataFrame
A tuple containing `i` DataFrames, each being a partition of the original DataFrame.
Examples
--------
>>> import pandas as pd
>>> import abraxos
>>> df = pd.DataFrame({'A': range(10)})
>>> abraxos.split(df, 3)
( A
0 0
1 1
2 2
3 3,
A
4 4
5 5
6 6,
A
7 7
8 8
9 9)
"""
return tuple(map(pd.DataFrame, np.array_split(df, i)))
[docs]
def clear(df: pd.DataFrame) -> pd.DataFrame:
"""
Returns an empty DataFrame with the same schema (columns and dtypes) as the input.
Parameters
----------
df : pd.DataFrame
The input DataFrame.
Returns
-------
pd.DataFrame
An empty DataFrame with the same structure as `df`.
Examples
--------
>>> df = pd.DataFrame({'x': [1, 2, 3]})
>>> clear(df)
Empty DataFrame
Columns: [x]
Index: []
"""
return df.iloc[:0]
[docs]
def to_records(df: pd.DataFrame) -> list[dict[t.Any, t.Any]]:
"""
Converts a DataFrame to a list of record dictionaries, replacing NaN with None.
This is useful for inserting into databases that expect `None` for nulls.
Parameters
----------
df : pd.DataFrame
The DataFrame to convert.
Returns
-------
list of dict
A list of records (dicts), where each dict is a row in the DataFrame.
Examples
--------
>>> df = pd.DataFrame({'a': [1, None], 'b': ['x', 'y']})
>>> to_records(df)
[{'a': 1.0, 'b': 'x'}, {'a': None, 'b': 'y'}]
"""
df = df.fillna(np.nan).replace(np.nan, None)
return df.to_dict('records') # type: ignore[no-any-return]