How to use built-in or custom functions in groupby operations and set the name of the resulting column.
import pandas as pd
import numpy as np
# use iris dataset from seaborn package
df = pd.read_csv(
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"
)
df_grouped = (
df.groupby("species")
.agg(
pl_min=("petal_length", "min"),
pl_max=("petal_length", lambda x: max(x)),
pl_perc95=("petal_length", lambda x: np.percentile(x, 95)),
)
.reset_index()
)
df_grouped
0 |
setosa |
1.0 |
1.9 |
1.700 |
1 |
versicolor |
3.0 |
5.1 |
4.900 |
2 |
virginica |
4.5 |
6.9 |
6.655 |
Aggregate all rows
import pandas as pd
import numpy as np
# use iris dataset from seaborn package
df = pd.read_csv(
"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv"
)
df_grouped = (
df.groupby(lambda x: True) # from https://stackoverflow.com/a/46583472
.agg(
pl_min=("petal_length", "min"),
pl_max=("petal_length", lambda x: max(x)),
pl_perc95=("petal_length", lambda x: np.percentile(x, 95)),
)
.reset_index()
)
df_grouped