import ibis
import ibis.selectors as s
= True
ibis.options.interactive
= ibis.examples.penguins.fetch() t
Basic input/output
If you don’t have your own data, you can load example data from the ibis.examples
module:
Overview
Ibis is typically used with a backend that already contains tables, but can import and export data in various formats.
Data platforms
You can connect Ibis to any supported backend to read and write data in backend-native tables.
Code
= ibis.duckdb.connect("penguins.ddb")
con = con.create_table("penguins", t.to_pyarrow(), overwrite=True) t
= ibis.duckdb.connect("penguins.ddb")
con = con.table("penguins")
t 3) t.head(
- 1
- Connect to a backend.
- 2
- Load a table.
- 3
- Display the table.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
= (
grouped "species", "island"])
t.group_by([=ibis._.count())
.aggregate(count"count"))
.order_by(ibis.desc(
)"penguins_grouped", grouped.to_pyarrow(), overwrite=True) con.create_table(
- 1
- Create a lazily evaluated Ibis expression.
- 2
- Write to a table.
┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ count ┃ ┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━┩ │ string │ string │ int64 │ ├───────────┼───────────┼───────┤ │ Gentoo │ Biscoe │ 124 │ │ Chinstrap │ Dream │ 68 │ │ Adelie │ Dream │ 56 │ │ Adelie │ Torgersen │ 52 │ │ Adelie │ Biscoe │ 44 │ └───────────┴───────────┴───────┘
File formats
Depending on the backend, you can read and write data in several file formats.
pip install 'ibis-framework[duckdb]'
"penguins.csv")
t.to_csv("penguins.csv").head(3) ibis.read_csv(
- 1
- Write the table to a CSV file. Dependent on backend.
- 2
- Read the CSV file into a table. Dependent on backend.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
pip install 'ibis-framework[duckdb,deltalake]'
"penguins.delta", mode="overwrite")
t.to_delta("penguins.delta").head(3) ibis.read_delta(
- 1
- Write the table to a Delta Lake table. Dependent on backend.
- 2
- Read the Delta Lake table into a table. Dependent on backend.
--------------------------------------------------------------------------- InvalidInputException Traceback (most recent call last) File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/IPython/core/formatters.py:770, in PlainTextFormatter.__call__(self, obj) 763 stream = StringIO() 764 printer = pretty.RepresentationPrinter(stream, self.verbose, 765 self.max_width, self.newline, 766 max_seq_length=self.max_seq_length, 767 singleton_pprinters=self.singleton_printers, 768 type_pprinters=self.type_printers, 769 deferred_pprinters=self.deferred_printers) --> 770 printer.pretty(obj) 771 printer.flush() 772 return stream.getvalue() File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/IPython/lib/pretty.py:419, in RepresentationPrinter.pretty(self, obj) 408 return meth(obj, self, cycle) 409 if ( 410 cls is not object 411 # check if cls defines __repr__ (...) 417 and callable(_safe_getattr(cls, "__repr__", None)) 418 ): --> 419 return _repr_pprint(obj, self, cycle) 421 return _default_pprint(obj, self, cycle) 422 finally: File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/IPython/lib/pretty.py:794, in _repr_pprint(obj, p, cycle) 792 """A pprint that just redirects to the normal repr function.""" 793 # Find newlines and replace them with p.break_() --> 794 output = repr(obj) 795 lines = output.splitlines() 796 with p.group(): File ~/work/ibis/ibis/ibis/expr/types/core.py:83, in Expr.__repr__(self) 81 def __repr__(self) -> str: 82 if ibis.options.interactive: ---> 83 return _capture_rich_renderable(self) 84 else: 85 return self._noninteractive_repr() File ~/work/ibis/ibis/ibis/expr/types/core.py:63, in _capture_rich_renderable(renderable) 61 console = Console(force_terminal=False) 62 with console.capture() as capture: ---> 63 console.print(renderable) 64 return capture.get().rstrip() File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/rich/console.py:1705, in Console.print(self, sep, end, style, justify, overflow, no_wrap, emoji, markup, highlight, width, height, crop, soft_wrap, new_line_start, *objects) 1703 if style is None: 1704 for renderable in renderables: -> 1705 extend(render(renderable, render_options)) 1706 else: 1707 for renderable in renderables: File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/rich/console.py:1306, in Console.render(self, renderable, options) 1304 renderable = rich_cast(renderable) 1305 if hasattr(renderable, "__rich_console__") and not isclass(renderable): -> 1306 render_iterable = renderable.__rich_console__(self, _options) 1307 elif isinstance(renderable, str): 1308 text_renderable = self.render_str( 1309 renderable, highlight=_options.highlight, markup=_options.markup 1310 ) File ~/work/ibis/ibis/ibis/expr/types/core.py:106, in Expr.__rich_console__(self, console, options) 103 if opts.interactive: 104 from ibis.expr.types.pretty import to_rich --> 106 rich_object = to_rich(self, console_width=console_width) 107 else: 108 rich_object = Text(self._noninteractive_repr()) File ~/work/ibis/ibis/ibis/expr/types/pretty.py:279, in to_rich(expr, max_rows, max_columns, max_length, max_string, max_depth, console_width) 275 return _to_rich_scalar( 276 expr, max_length=max_length, max_string=max_string, max_depth=max_depth 277 ) 278 else: --> 279 return _to_rich_table( 280 expr, 281 max_rows=max_rows, 282 max_columns=max_columns, 283 max_length=max_length, 284 max_string=max_string, 285 max_depth=max_depth, 286 console_width=console_width, 287 ) File ~/work/ibis/ibis/ibis/expr/types/pretty.py:352, in _to_rich_table(tablish, max_rows, max_columns, max_length, max_string, max_depth, console_width) 349 if orig_ncols > len(computed_cols): 350 table = table.select(*computed_cols) --> 352 result = table.limit(max_rows + 1).to_pyarrow() 353 # Now format the columns in order, stopping if the console width would 354 # be exceeded. 355 col_info = [] File ~/work/ibis/ibis/ibis/expr/types/core.py:577, in Expr.to_pyarrow(self, params, limit, **kwargs) 549 @experimental 550 def to_pyarrow( 551 self, (...) 555 **kwargs: Any, 556 ) -> pa.Table: 557 """Execute expression and return results in as a pyarrow table. 558 559 This method is eager and will execute the associated expression (...) 575 A pyarrow table holding the results of the executed expression. 576 """ --> 577 return self._find_backend(use_default=True).to_pyarrow( 578 self, params=params, limit=limit, **kwargs 579 ) File ~/work/ibis/ibis/ibis/backends/duckdb/__init__.py:1304, in Backend.to_pyarrow(self, expr, params, limit, **_) 1296 def to_pyarrow( 1297 self, 1298 expr: ir.Expr, (...) 1302 **_: Any, 1303 ) -> pa.Table: -> 1304 table = self._to_duckdb_relation(expr, params=params, limit=limit).arrow() 1305 return expr.__pyarrow_result__(table) InvalidInputException: Invalid Input Error: arrow_scan: get_next failed(): IOError: Repetition level histogram size mismatch
pip install 'ibis-framework[duckdb]'
"penguins.parquet")
t.to_parquet("penguins.parquet").head(3) ibis.read_parquet(
- 1
- Write the table to a Parquet file. Dependent on backend.
- 2
- Read the Parquet file into a table. Dependent on backend.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ int64 │ int64 │ string │ int64 │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ 3750 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ 3800 │ female │ 2007 │ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ 3250 │ female │ 2007 │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
With other Python libraries
Ibis uses Apache Arrow for efficient data transfer to and from other libraries. Ibis tables implement the __dataframe__
and __array__
protocols, so you can pass them to any library that supports these protocols.
You can convert Ibis tables to pandas dataframes.
pip install pandas
= t.to_pandas()
df 3) df.head(
- 1
- Returns a pandas dataframe.
species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex | year | |
---|---|---|---|---|---|---|---|---|
0 | Adelie | Torgersen | 39.1 | 18.7 | 181.0 | 3750.0 | male | 2007 |
1 | Adelie | Torgersen | 39.5 | 17.4 | 186.0 | 3800.0 | female | 2007 |
2 | Adelie | Torgersen | 40.3 | 18.0 | 195.0 | 3250.0 | female | 2007 |
Or you can convert pandas dataframes to Ibis tables.
= ibis.memtable(df)
t 3) t.head(
- 1
- Returns an Ibis table.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ float64 │ float64 │ string │ int64 │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181.0 │ 3750.0 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186.0 │ 3800.0 │ female │ 2007 │ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195.0 │ 3250.0 │ female │ 2007 │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
You can convert Ibis tables to Polars dataframes.
pip install polars
import polars as pl
= pl.from_arrow(t.to_pyarrow())
df 3) df.head(
species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex | year |
---|---|---|---|---|---|---|---|
str | str | f64 | f64 | f64 | f64 | str | i64 |
"Adelie" | "Torgersen" | 39.1 | 18.7 | 181.0 | 3750.0 | "male" | 2007 |
"Adelie" | "Torgersen" | 39.5 | 17.4 | 186.0 | 3800.0 | "female" | 2007 |
"Adelie" | "Torgersen" | 40.3 | 18.0 | 195.0 | 3250.0 | "female" | 2007 |
Or Polars dataframes to Ibis tables.
= ibis.memtable(df)
t 3) t.head(
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ float64 │ float64 │ string │ int64 │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181.0 │ 3750.0 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186.0 │ 3800.0 │ female │ 2007 │ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195.0 │ 3250.0 │ female │ 2007 │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
You can convert Ibis tables to PyArrow tables.
pip install pyarrow
t.to_pyarrow()
pyarrow.Table
species: string
island: string
bill_length_mm: double
bill_depth_mm: double
flipper_length_mm: double
body_mass_g: double
sex: string
year: int64
----
species: [["Adelie","Adelie","Adelie","Adelie","Adelie",...,"Chinstrap","Chinstrap","Chinstrap","Chinstrap","Chinstrap"]]
island: [["Torgersen","Torgersen","Torgersen","Torgersen","Torgersen",...,"Dream","Dream","Dream","Dream","Dream"]]
bill_length_mm: [[39.1,39.5,40.3,null,36.7,...,55.8,43.5,49.6,50.8,50.2]]
bill_depth_mm: [[18.7,17.4,18,null,19.3,...,19.8,18.1,18.2,19,18.7]]
flipper_length_mm: [[181,186,195,null,193,...,207,202,193,210,198]]
body_mass_g: [[3750,3800,3250,null,3450,...,4000,3400,3775,4100,3775]]
sex: [["male","female","female",null,"female",...,"male","female","male","male","female"]]
year: [[2007,2007,2007,2007,2007,...,2009,2009,2009,2009,2009]]
Or PyArrow batches:
t.to_pyarrow_batches()
<pyarrow.lib.RecordBatchReader at 0x7fff849c3d50>
And you can convert PyArrow tables to Ibis tables.
3) ibis.memtable(t.to_pyarrow()).head(
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓ ┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ body_mass_g ┃ sex ┃ year ┃ ┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩ │ string │ string │ float64 │ float64 │ float64 │ float64 │ string │ int64 │ ├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤ │ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181.0 │ 3750.0 │ male │ 2007 │ │ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186.0 │ 3800.0 │ female │ 2007 │ │ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195.0 │ 3250.0 │ female │ 2007 │ └─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
You can convert Ibis tables to torch tensors.
pip install torch
3).to_torch() t.select(s.numeric()).limit(
{'col2': tensor([39.1000, 39.5000, 40.3000], dtype=torch.float64),
'col3': tensor([18.7000, 17.4000, 18.0000], dtype=torch.float64),
'col4': tensor([181., 186., 195.], dtype=torch.float64),
'col5': tensor([3750., 3800., 3250.], dtype=torch.float64),
'col7': tensor([2007, 2007, 2007], dtype=torch.int16)}
You can directly call the __dataframe__
protocol on Ibis tables, though this is typically handled by the library you’re using.
t.__dataframe__()
<ibis.expr.types.dataframe_interchange.IbisDataFrame at 0x7fff84536e70>
You can directly call the __array__
protocol on Ibis tables, though this is typically handled by the library you’re using.
t.__array__()
array([['Adelie', 'Torgersen', 39.1, ..., 3750.0, 'male', 2007],
['Adelie', 'Torgersen', 39.5, ..., 3800.0, 'female', 2007],
['Adelie', 'Torgersen', 40.3, ..., 3250.0, 'female', 2007],
...,
['Chinstrap', 'Dream', 49.6, ..., 3775.0, 'male', 2009],
['Chinstrap', 'Dream', 50.8, ..., 4100.0, 'male', 2009],
['Chinstrap', 'Dream', 50.2, ..., 3775.0, 'female', 2009]],
shape=(344, 8), dtype=object)