Column selectors

Choose Table columns based on dtype, regex, and other criteria

where

where(predicate)

Select columns that satisfy predicate.

Use this selector when one of the other selectors does not meet your needs.

Parameters

Name Type Description Default
predicate Callable[[ir.Value], bool] A callable that accepts an ibis value expression and returns a bool required

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(a="float32"), name="t")
>>> expr = t.select(s.where(lambda col: col.get_name() == "a"))
>>> expr.columns
('a',)

numeric

numeric()

Return numeric columns.

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(a="int", b="string", c="array<string>"), name="t")
>>> t.columns
('a', 'b', 'c')
>>> expr = t.select(s.numeric())  # `a` has integer type, so it's numeric
>>> expr.columns
('a',)

See Also

of_type

of_type

of_type(dtype)

Select columns of type dtype.

Parameters

Name Type Description Default
dtype dt.DataType | str | type[dt.DataType] DataType instance, str or DataType class required

Examples

Select according to a specific DataType instance

>>> import ibis
>>> import ibis.expr.datatypes as dt
>>> import ibis.selectors as s
>>> t = ibis.table(dict(name="string", siblings="array<string>", parents="array<int64>"))
>>> expr = t.select(s.of_type(dt.Array(dt.string)))
>>> expr.columns
('siblings',)

Strings are also accepted

>>> expr = t.select(s.of_type("array<string>"))
>>> expr.columns
('siblings',)

Abstract/unparametrized types may also be specified by their string name (e.g. “integer” for any integer type), or by passing in a DataType class instead. The following options are equivalent.

>>> expr1 = t.select(s.of_type("array"))
>>> expr2 = t.select(s.of_type(dt.Array))
>>> expr1.equals(expr2)
True
>>> expr2.columns
('siblings', 'parents')

See Also

numeric

startswith

startswith(prefixes)

Select columns whose name starts with one of prefixes.

Parameters

Name Type Description Default
prefixes str | tuple[str, …] Prefixes to compare column names against required

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(apples="int", oranges="float", bananas="bool"), name="t")
>>> expr = t.select(s.startswith(("a", "b")))
>>> expr.columns
('apples', 'bananas')

See Also

endswith

endswith

endswith(suffixes)

Select columns whose name ends with one of suffixes.

Parameters

Name Type Description Default
suffixes str | tuple[str, …] Suffixes to compare column names against required

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(model_id="int", model_name="str", user_id="int"), name="t")
>>> expr = t.select(s.endswith("id"))
>>> expr.columns
('model_id', 'user_id')

See Also

startswith

contains

contains(needles, how=builtins.any)

Return columns whose name contains needles.

Parameters

Name Type Description Default
needles str | tuple[str, …] One or more strings to search for in column names required
how Callable[[Iterable[bool]], bool] A boolean reduction to allow the configuration of how needles are summarized. builtins.any

Examples

Select columns that contain either "a" or "b"

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(
...     dict(a="int64", b="string", c="float", d="array<int16>", ab="struct<x: int>")
... )
>>> expr = t.select(s.contains(("a", "b")))
>>> expr.columns
('a', 'b', 'ab')

Select columns that contain all of "a" and "b", that is, both "a" and "b" must be in each column’s name to match.

>>> expr = t.select(s.contains(("a", "b"), how=all))
>>> expr.columns
('ab',)

See Also

matches

matches

matches(regex)

Return columns whose name matches the regular expression regex.

Parameters

Name Type Description Default
regex str | re.Pattern A string or re.Pattern object required

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(ab="string", abd="int", be="array<string>"))
>>> expr = t.select(s.matches(r"ab+"))
>>> expr.columns
('ab', 'abd')

See Also

contains

any_of

any_of(*predicates)

Include columns satisfying any of predicates.

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(model_id="int", model_name="str", user_id="int"), name="t")
>>> expr = t.select(s.any_of(s.endswith("id"), s.startswith("m")))
>>> expr.columns
('model_id', 'model_name', 'user_id')

all_of

all_of(*predicates)

Include columns satisfying all of predicates.

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(model_id="int", model_name="str", user_id="int"), name="t")
>>> expr = t.select(s.all_of(s.endswith("id"), s.startswith("m")))
>>> expr.columns
('model_id',)

cols

cols(*names)

Select specific column names.

Parameters

Name Type Description Default
names str | ir.Column The column names to select ()

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table({"a": "int", "b": "int", "c": "int"})
>>> expr = t.select(s.cols("a", "b"))
>>> expr.columns
('a', 'b')

See Also

index

across

across(selector, func, names=None)

Apply data transformations across multiple columns.

Parameters

Name Type Description Default
selector Selector | Iterable[str] | str An expression that selects columns on which the transformation function will be applied, an iterable of str column names or a single str column name. required
func Deferred | Callable[[ir.Value], ir.Value] | Mapping[str | None, Deferred | Callable[[ir.Value], ir.Value]] A function (or dictionary of functions) to use to transform the data. required
names str | Callable[[str, str | None], str] | None A lambda function or a format string to name the columns created by the transformation function. None

Returns

Name Type Description
Across An Across selector object

Examples

>>> import ibis
>>> ibis.options.interactive = True
>>> from ibis import _, selectors as s
>>> t = ibis.examples.penguins.fetch()
>>> t.select(s.startswith("bill")).mutate(
...     s.across(s.numeric(), dict(centered=_ - _.mean()), names="{fn}_{col}")
... )
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ bill_length_mm  bill_depth_mm  centered_bill_length_mm  centered_bill_depth_mm ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━┩
│ float64float64float64float64                │
├────────────────┼───────────────┼─────────────────────────┼────────────────────────┤
│           39.118.7-4.821931.54883 │
│           39.517.4-4.421930.24883 │
│           40.318.0-3.621930.84883 │
│           NULLNULLNULLNULL │
│           36.719.3-7.221932.14883 │
│           39.320.6-4.621933.44883 │
│           38.917.8-5.021930.64883 │
│           39.219.6-4.721932.44883 │
│           34.118.1-9.821930.94883 │
│           42.020.2-1.921933.04883 │
│               │
└────────────────┴───────────────┴─────────────────────────┴────────────────────────┘

if_any

if_any(selector, predicate)

Return the disjunction of predicate applied on all selector columns.

Parameters

Name Type Description Default
selector Selector A column selector required
predicate Deferred | Callable A callable or deferred object defining a predicate to apply to each column from selector. required

Examples

>>> import ibis
>>> from ibis import selectors as s, _
>>> ibis.options.interactive = True
>>> penguins = ibis.examples.penguins.fetch()
>>> cols = s.across(s.endswith("_mm"), (_ - _.mean()) / _.std())
>>> expr = penguins.mutate(cols).filter(s.if_any(s.endswith("_mm"), _.abs() > 2))
>>> expr_by_hand = penguins.mutate(cols).filter(
...     (_.bill_length_mm.abs() > 2)
...     | (_.bill_depth_mm.abs() > 2)
...     | (_.flipper_length_mm.abs() > 2)
... )
>>> expr.equals(expr_by_hand)
True
>>> expr
┏━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island  bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64float64int64stringint64 │
├─────────┼────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Biscoe-1.1030020.733662-2.0563073150female2007 │
│ Gentoo Biscoe1.113285-0.4310172.0683685700male  2007 │
│ Gentoo Biscoe2.871660-0.0765502.0683686050male  2007 │
│ Gentoo Biscoe1.900890-0.7348462.1394835650male  2008 │
│ Gentoo Biscoe1.076652-0.1778262.0683685700male  2008 │
│ Gentoo Biscoe0.856855-0.5829322.0683685800male  2008 │
│ Gentoo Biscoe1.497929-0.0765502.0683685550male  2009 │
│ Gentoo Biscoe1.388031-0.4310172.0683685500male  2009 │
│ Gentoo Biscoe2.047422-0.5829322.0683685850male  2009 │
│ Adelie Dream -2.165354-0.836123-0.9184663050female2009 │
│  │
└─────────┴────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

if_all

if_all(selector, predicate)

Return the conjunction of predicate applied on all selector columns.

Parameters

Name Type Description Default
selector Selector A column selector required
predicate Deferred | Callable A callable or deferred object defining a predicate to apply to each column from selector. required

Examples

>>> import ibis
>>> from ibis import selectors as s, _
>>> ibis.options.interactive = True
>>> penguins = ibis.examples.penguins.fetch()
>>> cols = s.across(s.endswith("_mm"), (_ - _.mean()) / _.std())
>>> expr = penguins.mutate(cols).filter(s.if_all(s.endswith("_mm"), _.abs() > 1))
>>> expr_by_hand = penguins.mutate(cols).filter(
...     (_.bill_length_mm.abs() > 1)
...     & (_.bill_depth_mm.abs() > 1)
...     & (_.flipper_length_mm.abs() > 1)
... )
>>> expr.equals(expr_by_hand)
True
>>> expr
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64float64int64stringint64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Dream    -1.1579511.088129-1.4162723300female2007 │
│ Adelie Torgersen-1.2312171.138768-1.2029263900male  2008 │
│ Gentoo Biscoe   1.149917-1.4437811.2149875700male  2007 │
│ Gentoo Biscoe   1.040019-1.0893141.0727574750male  2008 │
│ Gentoo Biscoe   1.131601-1.0893141.7127925000male  2008 │
│ Gentoo Biscoe   1.241499-1.0893141.5705625550male  2008 │
│ Gentoo Biscoe   1.351398-1.4944201.2149875300male  2009 │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

index

index

Select columns by index.

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(
...     {"a": "int", "b": "int", "c": "int", "d": "int", "e": "int"}
... )

Select one column by numeric index:

>>> expr = t.select(s.index[0])
>>> expr.columns
('a',)

Select multiple columns by numeric index:

>>> expr = t.select(s.index[[0, 1]])
>>> expr.columns
('a', 'b')

Select a slice of columns by numeric index:

>>> expr = t.select(s.index[1:4])
>>> expr.columns
('b', 'c', 'd')

Select a slice of columns by name:

>>> expr = t.select(s.index["b":"d"])
>>> expr.columns
('b', 'c', 'd')

See Also

cols

first

first()

Return the first column of a table.

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(model_id="int", model_name="str", user_id="int"), name="t")
>>> expr = t.select(s.first())
>>> expr.columns
('model_id',)

last

last()

Return the last column of a table.

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(model_id="int", model_name="str", user_id="int"), name="t")
>>> expr = t.select(s.last())
>>> expr.columns
('user_id',)

all

all()

Return every column from a table.

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> t = ibis.table(dict(model_id="int", model_name="str", user_id="int"), name="t")
>>> expr = t.select(s.all())
>>> expr.columns
('model_id', 'model_name', 'user_id')

none

none()

Return no columns.

Examples

>>> import ibis
>>> import ibis.selectors as s
>>> ibis.options.interactive = True
>>> t = ibis.memtable(
...     {
...         "id": [1, 2, 3, 4, 5, 6],
...         "color": ["Red", "Green", "Blue", "Blue", "Red", "Blue"],
...     }
... )

s.none() results in an empty expansion.

>>> s.none().expand(t)
[]

This can be useful when you want to pivot a table without identifying unique observations.

>>> t.pivot_wider(
...     id_cols=s.none(),
...     names_from="color",
...     values_from="color",
...     values_agg="count",
...     names_sort=True,
... )
┏━━━━━━━┳━━━━━━━┳━━━━━━━┓
┃ Blue   Green  Red   ┃
┡━━━━━━━╇━━━━━━━╇━━━━━━━┩
│ int64int64int64 │
├───────┼───────┼───────┤
│     312 │
└───────┴───────┴───────┘
Back to top