Skip to content

DataFrameSchema

laktory.models.dataframe.DataFrameSchema ¤

Bases: BaseModel

DataFrame schema. Typically used to explicitly express a schema when reading files.

Examples:

from laktory import models

schema = models.DataFrameSchema(columns={"a": "string", "x": "double"})
PARAMETER DESCRIPTION
columns

Dict or list of columns

TYPE: dict[str, str | DType | DataFrameColumn] | list[DataFrameColumn] | VariableType

METHOD DESCRIPTION
from_df

Create a DataFrameSchema from a DataFrame

from_narwhals

Create a DataFrameSchema from a Narwhals schema

to_narwhals

Returns a Narwhals schema object

to_polars

Returns a Polars schema object

to_pyspark

Returns a Spark schema object

to_string

Returns a string representation of the schema

from_df(df) classmethod ¤

Create a DataFrameSchema from a DataFrame

Columns are populated from the narwhals schema for introspection. The native schema is stored directly so that to_native() can return it as-is, bypassing column-by-column conversion.

Source code in laktory/models/dataframe/dataframeschema.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
@classmethod
def from_df(cls, df) -> "DataFrameSchema":
    """
    Create a DataFrameSchema from a DataFrame

    Columns are populated from the narwhals schema for introspection. The
    native schema is stored directly so that `to_native()` can return it as-is,
    bypassing column-by-column conversion.
    """
    if not isinstance(df, (nw.LazyFrame, nw.DataFrame)):
        df = df.from_native(df)

    obj = cls.from_narwhals(df.collect_schema())
    obj._native_schema = df.to_native().schema

    return obj

from_narwhals(schema, native_schema=None) classmethod ¤

Create a DataFrameSchema from a Narwhals schema

Source code in laktory/models/dataframe/dataframeschema.py
83
84
85
86
87
88
89
90
91
92
93
94
@classmethod
def from_narwhals(cls, schema: nw.Schema, native_schema=None) -> "DataFrameSchema":
    """Create a DataFrameSchema from a Narwhals schema"""
    columns = [
        DataFrameColumn(name=name, dtype=DType.from_narwhals(dtype))
        for name, dtype in schema.items()
    ]
    obj = cls(columns=columns)
    if native_schema:
        obj._native_schema = native_schema

    return obj

to_narwhals() ¤

Returns a Narwhals schema object

Source code in laktory/models/dataframe/dataframeschema.py
100
101
102
103
104
105
def to_narwhals(self) -> nw.Schema:
    """Returns a Narwhals schema object"""
    cols = {}
    for c in self.columns:
        cols[c.name] = c.dtype.to_narwhals()
    return nw.Schema(cols)

to_polars() ¤

Returns a Polars schema object

Source code in laktory/models/dataframe/dataframeschema.py
123
124
125
126
127
128
129
130
def to_polars(self):
    """Returns a Polars schema object"""
    import polars as pl

    cols = {}
    for c in self.columns:
        cols[c.name] = c.dtype.to_polars()
    return pl.Schema(cols)

to_pyspark() ¤

Returns a Spark schema object

Source code in laktory/models/dataframe/dataframeschema.py
133
134
135
136
137
138
139
140
141
142
def to_pyspark(self):
    """Returns a Spark schema object"""
    import pyspark.sql.types as T

    columns = []
    for c in self.columns:
        _type = c.dtype.to_pyspark()
        columns += [T.StructField(c.name, _type, c.nullable)]

    return T.StructType(columns)

to_string(indent=None) ¤

Returns a string representation of the schema

Source code in laktory/models/dataframe/dataframeschema.py
149
150
151
152
153
def to_string(self, indent=None):
    """Returns a string representation of the schema"""
    return json.dumps(
        {c.name: c.dtype.to_string() for c in self.columns}, indent=indent
    )