Source code for abraxos.validate

"""Pydantic model validation for DataFrame rows."""

from __future__ import annotations

import typing as t

import pandas as pd

from abraxos import utils

__all__ = ['PydanticModel', 'ValidateResult', 'validate']


[docs] class PydanticModel(t.Protocol): """ Protocol representing a Pydantic-like model for validation and serialization. """
[docs] def model_validate(self, record: dict[t.Any, t.Any]) -> PydanticModel: """ Validates a dictionary record and returns a validated model instance. """ ...
[docs] def model_dump(self) -> dict: """ Serializes the model into a dictionary. """ ...
[docs] class ValidateResult(t.NamedTuple): """ Result of validating a DataFrame using a Pydantic-like model. Attributes ---------- errors : list of Exception List of exceptions encountered during validation. errored_df : pd.DataFrame DataFrame of rows that failed validation. success_df : pd.DataFrame DataFrame of successfully validated and serialized rows. """ errors: list[Exception] errored_df: pd.DataFrame success_df: pd.DataFrame
[docs] def validate( df: pd.DataFrame, model: type[PydanticModel] | PydanticModel ) -> ValidateResult: """ Validates each row in a DataFrame using a Pydantic-like model. Each record is passed to the model's `model_validate` method. Successfully validated models are converted back into rows using `model_dump`. Parameters ---------- df : pd.DataFrame The DataFrame containing records to be validated. model : type[PydanticModel] or PydanticModel A Pydantic-style model class or instance with `model_validate` and `model_dump` methods. Returns ------- ValidateResult A named tuple with: - errors: List of exceptions raised during validation. - errored_df: DataFrame of rows that failed validation. - success_df: DataFrame of rows that were successfully validated. Examples -------- >>> import pandas as pd >>> from pydantic import BaseModel >>> class Person(BaseModel): ... name: str ... age: int >>> df = pd.DataFrame({'name': ['Alice', 'Bob'], 'age': [30, 'invalid']}) >>> result = validate(df, Person) >>> len(result.success_df) 1 >>> len(result.errored_df) 1 """ errors: list[Exception] = [] errored_records: list[pd.Series] = [] valid_records: list[pd.Series] = [] records: list[dict] = utils.to_records(df) for index, record in zip(df.index, records): try: validated: PydanticModel = model.model_validate(record) # type: ignore[call-arg, arg-type] valid_records.append(pd.Series(validated.model_dump(), name=index)) except Exception as e: errors.append(e) errored_records.append(pd.Series(record, name=index)) errored_df = pd.DataFrame(errored_records) success_df = pd.DataFrame(valid_records) # Ensure column order matches input DataFrame errored_df = errored_df[df.columns] if not errored_df.empty else utils.clear(df) success_df = success_df[df.columns] if not success_df.empty else utils.clear(df) return ValidateResult(errors, errored_df, success_df)