>>> import ibis
>>> @ibis.udf.scalar.builtin
def hamming(a: str, b: str) -> int:
... '''Compute the Hamming distance between two strings.'''
... >>> expr = hamming("duck", "luck")
>>> con = ibis.connect("duckdb://")
>>> con.execute(expr)
1
Scalar user-defined function APIs
Scalar user-defined functions.
scalar
class itself is not a public API, its methods are.
Name | Description |
---|---|
builtin | Construct a scalar user-defined function that is built-in to the backend. |
pandas | Construct a vectorized scalar user-defined function that accepts pandas Series’ as inputs. |
pyarrow | Construct a vectorized scalar user-defined function that accepts PyArrow Arrays as input. |
python | Construct a non-vectorized scalar user-defined function that accepts Python scalar values as inputs. |
Construct a scalar user-defined function that is built-in to the backend.
Name | Type | Description | Default |
---|---|---|---|
fn | The function to wrap. | None |
|
name | The name of the UDF in the backend if different from the function name. | None |
|
database | The database in which the builtin function resides. | None |
|
catalog | The catalog in which the builtin function resides. | None |
|
signature | If present, a tuple of the form ((arg0type, arg1type, ...), returntype) . For example, a function taking an int and a float and returning a string would be ((int, float), str) . If not present, the signature will be derived from the type annotations of the wrapped function. For builtin UDFs, only the return type annotation is required. See the user guide for more information. |
None |
|
kwargs | Additional backend-specific configuration arguments for the UDF. | {} |
Construct a vectorized scalar user-defined function that accepts pandas Series’ as inputs.
Name | Type | Description | Default |
---|---|---|---|
fn | The function to wrap. | None |
|
name | The name of the UDF in the backend if different from the function name. | None |
|
database | The database in which to create the UDF. | None |
|
catalog | The catalog in which to create the UDF. | None |
|
signature | If present, a tuple of the form ((arg0type, arg1type, ...), returntype) . For example, a function taking an int and a float and returning a string would be ((int, float), str) . If not present, the signature will be derived from the type annotations of the wrapped function. |
None |
|
kwargs | Additional backend-specific configuration arguments for the UDF. | {} |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable(dict(int_col=[1, 2, 3], str_col=["a", "b", "c"]))
>>> t
┏━━━━━━━━━┳━━━━━━━━━┓
┃ int_col ┃ str_col ┃
┡━━━━━━━━━╇━━━━━━━━━┩
│ int64 │ string │
├─────────┼─────────┤
│ 1 │ a │
│ 2 │ b │
│ 3 │ c │
└─────────┴─────────┘
>>> @ibis.udf.scalar.pandas
... def str_cap(x: str) -> str:
... # note usage of pandas `str` method
... return x.str.capitalize()
>>> str_cap(t.str_col) # doctest: +SKIP
┏━━━━━━━━━━━━━━━━━━━━━━━┓
┃ string_cap_0(str_col) ┃
┡━━━━━━━━━━━━━━━━━━━━━━━┩
│ string │
├───────────────────────┤
│ A │
│ B │
│ C │
└───────────────────────┘
Define a UDF that adds one to an integer field that is part of a struct:
>>> import ibis.expr.datatypes as dt
>>> FieldType = dt.Struct({"a": "int"})
>>> @ibis.udf.scalar.pandas
... def add_one_py_struct(x: FieldType) -> int:
... return x["a"] + 1
>>> t = ibis.memtable({"struct_col": [{"a": 1}, {"a": 2}, {"a": 3}]})
>>> con = ibis.pyspark.connect()
>>> expr = add_one_py_struct(t.struct_col).name("added_one")
>>> con.execute(expr)
0 2
1 3
2 4
Name: added_one, dtype: int64
Similarly, you can operate on maps as well:
>>> FieldType = dt.Map(dt.string, dt.int64)
>>> @ibis.udf.scalar.pandas
... def add_one_py_map(x: FieldType) -> int:
... return x.map(lambda d: d["a"] + 1)
>>> t = ibis.memtable(
... {"map_col": [{"a": 1}, {"a": 2}, {"a": 3}]},
... schema={"map_col": "map<string, int>"},
... )
>>> expr = add_one_py_map(t.map_col).name("added_one")
>>> con.execute(expr)
0 2
1 3
2 4
Name: added_one, dtype: int64
Construct a vectorized scalar user-defined function that accepts PyArrow Arrays as input.
Name | Type | Description | Default |
---|---|---|---|
fn | The function to wrap. | None |
|
name | The name of the UDF in the backend if different from the function name. | None |
|
database | The database in which to create the UDF. | None |
|
catalog | The catalog in which to create the UDF. | None |
|
signature | If present, a tuple of the form ((arg0type, arg1type, ...), returntype) . For example, a function taking an int and a float and returning a string would be ((int, float), str) . If not present, the signature will be derived from the type annotations of the wrapped function. |
None |
|
kwargs | Additional backend-specific configuration arguments for the UDF. | {} |
>>> import ibis
>>> import pyarrow.compute as pc
>>> from datetime import date
>>> ibis.options.interactive = True
>>> t = ibis.memtable(
... dict(start_col=[date(2024, 4, 29)], end_col=[date(2025, 4, 29)]),
... )
>>> @ibis.udf.scalar.pyarrow
... def weeks_between(start: date, end: date) -> int:
... return pc.weeks_between(start, end)
>>> weeks_between(t.start_col, t.end_col)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ weeks_between_0(start_col, end_col) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ int64 │ ├─────────────────────────────────────┤ │ 52 │ └─────────────────────────────────────┘
Define a UDF that adds one to an integer field that is part of a struct:
>>> import pyarrow.compute as pac
>>> import ibis.expr.datatypes as dt
>>> FieldType = dt.Struct({"a": "int"})
>>> @ibis.udf.scalar.pyarrow
... def add_one_py_struct(x: FieldType) -> int:
... return pac.add(x.combine_chunks().field("a"), 1)
>>> t = ibis.memtable({"struct_col": [{"a": 1}, {"a": 2}, {"a": 3}]})
>>> add_one_py_struct(t.struct_col).name("added_one")
┏━━━━━━━━━━━┓ ┃ added_one ┃ ┡━━━━━━━━━━━┩ │ int64 │ ├───────────┤ │ 2 │ │ 3 │ │ 4 │ └───────────┘
Similarly, you can operate on maps as well:
>>> FieldType = dt.Map(dt.string, dt.int64)
>>> @ibis.udf.scalar.pyarrow
... def add_one_py_map(x: FieldType) -> int:
... return pac.add(pac.map_lookup(x, "a", occurrence="first"), 1)
>>> t = ibis.memtable(
... {"map_col": [{"a": 1}, {"a": 2}, {"a": 3}]},
... schema={"map_col": "map<string, int>"},
... )
>>> add_one_py_map(t.map_col).name("added_one")
┏━━━━━━━━━━━┓ ┃ added_one ┃ ┡━━━━━━━━━━━┩ │ int64 │ ├───────────┤ │ 2 │ │ 3 │ │ 4 │ └───────────┘
Construct a non-vectorized scalar user-defined function that accepts Python scalar values as inputs.
python
UDFs are likely to be slow
Name | Type | Description | Default |
---|---|---|---|
fn | The function to wrap. | None |
|
name | The name of the UDF in the backend if different from the function name. | None |
|
database | The database in which to create the UDF. | None |
|
catalog | The catalog in which to create the UDF. | None |
|
signature | If present, a tuple of the form ((arg0type, arg1type, ...), returntype) . For example, a function taking an int and a float and returning a string would be ((int, float), str) . If not present, the signature will be derived from the type annotations of the wrapped function. |
None |
|
kwargs | Additional backend-specific configuration arguments for the UDF. | {} |
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable(dict(int_col=[1, 2, 3], str_col=["a", "b", "c"]))
>>> t
┏━━━━━━━━━┳━━━━━━━━━┓ ┃ int_col ┃ str_col ┃ ┡━━━━━━━━━╇━━━━━━━━━┩ │ int64 │ string │ ├─────────┼─────────┤ │ 1 │ a │ │ 2 │ b │ │ 3 │ c │ └─────────┴─────────┘
>>> @ibis.udf.scalar.python
... def str_magic(x: str) -> str:
... return f"{x}_magic"
>>> @ibis.udf.scalar.python
... def add_one_py(x: int) -> int:
... return x + 1
>>> str_magic(t.str_col)
┏━━━━━━━━━━━━━━━━━━━━━━┓ ┃ str_magic_0(str_col) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━┩ │ string │ ├──────────────────────┤ │ a_magic │ │ b_magic │ │ c_magic │ └──────────────────────┘
┏━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ add_one_py_0(int_col) ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━┩ │ int64 │ ├───────────────────────┤ │ 2 │ │ 3 │ │ 4 │ └───────────────────────┘
Define a UDF that adds one to an integer field that is part of a struct:
>>> import ibis.expr.datatypes as dt
>>> FieldType = dt.Struct({"a": "int"})
>>> @ibis.udf.scalar.python
... def add_one_py_struct(x: FieldType) -> int:
... return x["a"] + 1
>>> t = ibis.memtable({"struct_col": [{"a": 1}, {"a": 2}, {"a": 3}]})
>>> add_one_py_struct(t.struct_col).name("added_one")
┏━━━━━━━━━━━┓ ┃ added_one ┃ ┡━━━━━━━━━━━┩ │ int64 │ ├───────────┤ │ 2 │ │ 3 │ │ 4 │ └───────────┘
Similarly, you can operate on maps as well:
>>> FieldType = dt.Map(dt.string, dt.int64)
>>> @ibis.udf.scalar.python
... def add_one_py_map(x: FieldType) -> int:
... return x["a"] + 1
>>> t = ibis.memtable(
... {"map_col": [{"a": 1}, {"a": 2}, {"a": 3}]},
... schema={"map_col": "map<string, int>"},
... )
>>> add_one_py_map(t.map_col).name("added_one")
┏━━━━━━━━━━━┓ ┃ added_one ┃ ┡━━━━━━━━━━━┩ │ int64 │ ├───────────┤ │ 2 │ │ 3 │ │ 4 │ └───────────┘