Skip to content

DataFrameColumnExpr

laktory.models.dataframe.DataFrameColumnExpr ¤

Bases: BaseModel, PipelineChild

DataFrame Column Expression defined with a string representation of DataFrame API expression (native or Narwhals) or a SQL statement.

Examples:

Define serializable expressions in native DataFrame API.

import polars as pl

import laktory as lk

df = pl.DataFrame(
    {
        "x": [1, 2, 3],
    }
)

expr1 = lk.models.DataFrameColumnExpr(
    expr="pl.col('x')+pl.lit(1)",
    dataframe_backend="POLARS",
    dataframe_api="NATIVE",
)

expr2 = lk.models.DataFrameColumnExpr(
    expr="2*x + 1",
    type="SQL",
    dataframe_backend="POLARS",
    dataframe_api="NATIVE",
)

df = df.with_columns(
    y1=expr1.to_expr(),
    y2=expr2.to_expr(),
)

print(df)
'''
| x | y1 | y2 |
|---|----|----|
| 1 | 2  | 3  |
| 2 | 3  | 5  |
| 3 | 4  | 7  |
'''

Define serializable expressions in Narwhals DataFrame API.

import narwhals as nw
import polars as pl

import laktory as lk

df = nw.from_native(
    pl.DataFrame(
        {
            "x": [1, 2, 3],
        }
    )
)

expr1 = lk.models.DataFrameColumnExpr(
    expr="nw.col('x')+nw.lit(1)",
    dataframe_backend="POLARS",
    dataframe_api="NARWHALS",
)

expr2 = lk.models.DataFrameColumnExpr(
    expr="2*x + 1",
    type="SQL",
    dataframe_backend="POLARS",
    dataframe_api="NARWHALS",
)

df = df.with_columns(
    y1=expr1.to_expr(),
    y2=expr2.to_expr(),
)

print(df)
'''
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
| | x | y1 | y2 |  |
| |---|----|----|  |
| | 1 | 2  | 3  |  |
| | 2 | 3  | 5  |  |
| | 3 | 4  | 7  |  |
└──────────────────┘
'''
PARAMETER DESCRIPTION
expr

Expression string representation

TYPE: str | VariableType

type

Expression type: DF or SQL. If None is specified, type is guessed from provided expression.

TYPE: Literal['SQL', 'DF'] | VariableType DEFAULT: None

METHOD DESCRIPTION
to_expr

Column expression expressed as DataFrame API object

to_sql_expr

Column expression expressed as a SQL Statement

to_expr() ¤

Column expression expressed as DataFrame API object

Source code in laktory/models/dataframe/dataframecolumnexpr.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
def to_expr(self) -> Union[nw.Expr, "pl.Expr", "F.Column"]:
    """Column expression expressed as DataFrame API object"""

    _value = self.expr.replace("\n", " ")

    if self.dataframe_api == "NARWHALS":
        if self.type == "SQL":
            from laktory.narwhals_ext.functions import sql_expr

            expr = sql_expr(_value)
        else:
            # Imports required to evaluate expressions
            import narwhals as nw  # noqa: F401
            from narwhals import col  # noqa: F401
            from narwhals import lit  # noqa: F401

            expr = eval(_value)
    else:
        if self.dataframe_backend == DataFrameBackends.PYSPARK:
            if self.type == "SQL":
                import pyspark.sql.functions as F

                expr = F.expr(_value)
            else:
                # Imports required to evaluate expressions
                import pyspark.sql.functions as F  # noqa: F401
                import pyspark.sql.types as T  # noqa: F401
                from pyspark.sql.functions import col  # noqa: F401
                from pyspark.sql.functions import lit  # noqa: F401

                expr = eval(_value)

        elif self.dataframe_backend == DataFrameBackends.POLARS:
            if self.type == "SQL":
                import polars as pl

                expr = pl.sql_expr(_value)
            else:
                # Imports required to evaluate expressions
                import polars as pl  # noqa: F401
                import polars.functions as F  # noqa: F401
                from polars import col  # noqa: F401
                from polars import lit  # noqa: F401

                expr = eval(self.expr)

        else:
            raise ValueError(
                f"`dataframe_backend` '{self.dataframe_backend}' is not supported."
            )

    return expr

to_sql_expr() ¤

Column expression expressed as a SQL Statement

Source code in laktory/models/dataframe/dataframecolumnexpr.py
159
160
161
162
163
164
165
166
167
def to_sql_expr(self) -> str:
    """Column expression expressed as a SQL Statement"""
    # -> pure SQL

    if self.type == "SQL":
        return self.expr
    else:
        # TODO: Use SQLFrame?
        raise ValueError("DataFrame expression can't be converted to SQL")