Skip to content

groupby_and_agg

laktory.narwhals_ext.dataframe.groupby_and_agg ¤

FUNCTION DESCRIPTION
groupby_and_agg

Apply a groupby and create aggregation columns.

groupby_and_agg(self, groupby_columns=None, agg_expressions=None) ¤

Apply a groupby and create aggregation columns.

PARAMETER DESCRIPTION
groupby_columns

List of column names to group by

TYPE: list[str] DEFAULT: None

agg_expressions

List of columns defining the aggregations

TYPE: list[DataFrameColumnExpr | str | Expr] DEFAULT: None

Examples:

import narwhals as nw
import polars as pl

import laktory as lk  # noqa: F401

df0 = nw.from_native(
    pl.DataFrame(
        {
            "x": [0, 0, 1, 1],
            "y": [1, 2, 3, 4],
        }
    )
)

df = df0.laktory.groupby_and_agg(
    groupby_columns=["x"],
    agg_expressions=[
        "nw.col('y').mean().alias('mean_price')",
    ],
)

print(df.sort("x"))
'''
┌──────────────────┐
|Narwhals DataFrame|
|------------------|
|| x | mean_price ||
||---|------------||
|| 0 | 1.5        ||
|| 1 | 3.5        ||
└──────────────────┘
'''
Source code in laktory/narwhals_ext/dataframe/groupby_and_agg.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def groupby_and_agg(
    self,
    groupby_columns: list[str] = None,
    agg_expressions: list[DataFrameColumnExpr | str | nw.Expr] = None,
) -> AnyFrame:
    """
    Apply a groupby and create aggregation columns.

    Parameters
    ----------
    groupby_columns:
        List of column names to group by
    agg_expressions:
        List of columns defining the aggregations

    Examples
    --------
    ```py
    import narwhals as nw
    import polars as pl

    import laktory as lk  # noqa: F401

    df0 = nw.from_native(
        pl.DataFrame(
            {
                "x": [0, 0, 1, 1],
                "y": [1, 2, 3, 4],
            }
        )
    )

    df = df0.laktory.groupby_and_agg(
        groupby_columns=["x"],
        agg_expressions=[
            "nw.col('y').mean().alias('mean_price')",
        ],
    )

    print(df.sort("x"))
    '''
    ┌──────────────────┐
    |Narwhals DataFrame|
    |------------------|
    || x | mean_price ||
    ||---|------------||
    || 0 | 1.5        ||
    || 1 | 3.5        ||
    └──────────────────┘
    '''
    ```
    """
    from laktory.models.dataframe.dataframecolumnexpr import DataFrameColumnExpr

    # Parse inputs
    if agg_expressions is None:
        raise ValueError("`agg_expressions` must be specified")
    if groupby_columns is None:
        groupby_columns = []

    logger.info(f"Executing groupby ({groupby_columns}) with {agg_expressions}")

    # Groupby arguments
    groupby = []

    for c in groupby_columns:
        groupby += [c]

    # Agg arguments
    aggs = []
    for expr in agg_expressions:
        if isinstance(expr, str):
            expr = DataFrameColumnExpr(expr=expr).to_expr()

        elif isinstance(expr, dict):
            expr = DataFrameColumnExpr(**expr).to_expr()

        aggs += [expr]

    return self._df.group_by(groupby).agg(*aggs)