Basic analytics

Assuming you have a table:

import ibis
import ibis.selectors as s

ibis.options.interactive = True

t = ibis.examples.penguins.fetch()
t.head(3)
1
Ensure you install Ibis first.
2
Use interactive mode for exploratory data analysis (EDA) or demos.
3
Load a dataset from the built-in examples.
4
Display the table.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64int64int64stringint64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Torgersen39.118.71813750male  2007 │
│ Adelie Torgersen39.517.41863800female2007 │
│ Adelie Torgersen40.318.01953250female2007 │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

You can perform basic analytics by selecting, grouping, aggregating, filtering, sorting, mutating, and joining data.

Selecting

Use the .select() method to select columns:

t.select("species", "island", "year")
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┓
┃ species  island     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━┩
│ stringstringint64 │
├─────────┼───────────┼───────┤
│ Adelie Torgersen2007 │
│ Adelie Torgersen2007 │
│ Adelie Torgersen2007 │
│ Adelie Torgersen2007 │
│ Adelie Torgersen2007 │
│ Adelie Torgersen2007 │
│ Adelie Torgersen2007 │
│ Adelie Torgersen2007 │
│ Adelie Torgersen2007 │
│ Adelie Torgersen2007 │
│  │
└─────────┴───────────┴───────┘

Filtering

Use the .filter() method to filter rows:

t.filter(t["species"] != "Adelie")
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64int64int64stringint64 │
├─────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Gentoo Biscoe46.113.22114500female2007 │
│ Gentoo Biscoe50.016.32305700male  2007 │
│ Gentoo Biscoe48.714.12104450female2007 │
│ Gentoo Biscoe50.015.22185700male  2007 │
│ Gentoo Biscoe47.614.52155400male  2007 │
│ Gentoo Biscoe46.513.52104550female2007 │
│ Gentoo Biscoe45.414.62114800female2007 │
│ Gentoo Biscoe46.715.32195200male  2007 │
│ Gentoo Biscoe43.313.42094400female2007 │
│ Gentoo Biscoe46.815.42155150male  2007 │
│  │
└─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

Aggregating

Use the .aggregate() method to aggregate data:

t.aggregate(avg_bill_length=t["bill_length_mm"].mean())
┏━━━━━━━━━━━━━━━━━┓
┃ avg_bill_length ┃
┡━━━━━━━━━━━━━━━━━┩
│ float64         │
├─────────────────┤
│        43.92193 │
└─────────────────┘

Grouping

Use the .group_by() method to group data:

t.group_by(["species", "island"]).aggregate(avg_bill_length=t["bill_length_mm"].mean())
┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ species    island     avg_bill_length ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ stringstringfloat64         │
├───────────┼───────────┼─────────────────┤
│ Adelie   Torgersen38.950980 │
│ Adelie   Biscoe   38.975000 │
│ Adelie   Dream    38.501786 │
│ Gentoo   Biscoe   47.504878 │
│ ChinstrapDream    48.833824 │
└───────────┴───────────┴─────────────────┘

Ordering

Use the order_by() method to order data:

t.order_by(t["bill_length_mm"].desc())
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species    island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64int64int64stringint64 │
├───────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Gentoo   Biscoe59.617.02306050male  2007 │
│ ChinstrapDream 58.017.81813700female2007 │
│ Gentoo   Biscoe55.917.02285600male  2009 │
│ ChinstrapDream 55.819.82074000male  2009 │
│ Gentoo   Biscoe55.116.02305850male  2009 │
│ Gentoo   Biscoe54.315.72315650male  2008 │
│ ChinstrapDream 54.220.82014300male  2008 │
│ ChinstrapDream 53.519.92054500male  2008 │
│ Gentoo   Biscoe53.415.82195500male  2009 │
│ ChinstrapDream 52.820.02054550male  2008 │
│  │
└───────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

Mutating

Use the .mutate() method to create new columns:

t.mutate(bill_length_cm=t["bill_length_mm"] / 10).relocate(
    t.columns[0:2], "bill_length_cm"
)
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_cm  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64float64int64int64stringint64 │
├─────────┼───────────┼────────────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Torgersen3.9139.118.71813750male  2007 │
│ Adelie Torgersen3.9539.517.41863800female2007 │
│ Adelie Torgersen4.0340.318.01953250female2007 │
│ Adelie TorgersenNULLNULLNULLNULLNULLNULL2007 │
│ Adelie Torgersen3.6736.719.31933450female2007 │
│ Adelie Torgersen3.9339.320.61903650male  2007 │
│ Adelie Torgersen3.8938.917.81813625female2007 │
│ Adelie Torgersen3.9239.219.61954675male  2007 │
│ Adelie Torgersen3.4134.118.11933475NULL2007 │
│ Adelie Torgersen4.2042.020.21904250NULL2007 │
│  │
└─────────┴───────────┴────────────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

Joining

Use the .join() method to join data:

t.join(t, t["species"] == t["species"], how="left_semi")
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64int64int64stringint64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Torgersen39.118.71813750male  2007 │
│ Adelie Torgersen39.517.41863800female2007 │
│ Adelie Torgersen40.318.01953250female2007 │
│ Adelie TorgersenNULLNULLNULLNULLNULL2007 │
│ Adelie Torgersen36.719.31933450female2007 │
│ Adelie Torgersen39.320.61903650male  2007 │
│ Adelie Torgersen38.917.81813625female2007 │
│ Adelie Torgersen39.219.61954675male  2007 │
│ Adelie Torgersen34.118.11933475NULL2007 │
│ Adelie Torgersen42.020.21904250NULL2007 │
│  │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

Combining it all together

We can use the underscore to chain expressions together.

t.join(t, t["species"] == t["species"], how="left_semi").filter(
    ibis._["species"] != "Adelie"
).group_by(["species", "island"]).aggregate(
    avg_bill_length=ibis._["bill_length_mm"].mean()
).order_by(
    ibis._["avg_bill_length"].desc()
)
┏━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━━┓
┃ species    island  avg_bill_length ┃
┡━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━━┩
│ stringstringfloat64         │
├───────────┼────────┼─────────────────┤
│ ChinstrapDream 48.833824 │
│ Gentoo   Biscoe47.504878 │
└───────────┴────────┴─────────────────┘

Since we’ve turned on interactive mode here, this executes the query and displays the result.

Back to top