Basic input/output

If you don’t have your own data, you can load example data from the ibis.examples module:

import ibis
import ibis.selectors as s

ibis.options.interactive = True

t = ibis.examples.penguins.fetch()

Overview

Ibis is typically used with a backend that already contains tables, but can import and export data in various formats.

Data platforms

You can connect Ibis to any supported backend to read and write data in backend-native tables.

Code
con = ibis.duckdb.connect("penguins.ddb")
t = con.create_table("penguins", t.to_pyarrow(), overwrite=True)
con = ibis.duckdb.connect("penguins.ddb")
t = con.table("penguins")
t.head(3)
1
Connect to a backend.
2
Load a table.
3
Display the table.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64int64int64stringint64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Torgersen39.118.71813750male  2007 │
│ Adelie Torgersen39.517.41863800female2007 │
│ Adelie Torgersen40.318.01953250female2007 │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
grouped = (
    t.group_by(["species", "island"])
    .aggregate(count=ibis._.count())
    .order_by(ibis.desc("count"))
)
con.create_table("penguins_grouped", grouped.to_pyarrow(), overwrite=True)
1
Create a lazily evaluated Ibis expression.
2
Write to a table.
┏━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━┓
┃ species    island     count ┃
┡━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━┩
│ stringstringint64 │
├───────────┼───────────┼───────┤
│ Gentoo   Biscoe   124 │
│ ChinstrapDream    68 │
│ Adelie   Dream    56 │
│ Adelie   Torgersen52 │
│ Adelie   Biscoe   44 │
└───────────┴───────────┴───────┘

File formats

Depending on the backend, you can read and write data in several file formats.

pip install 'ibis-framework[duckdb]'
t.to_csv("penguins.csv")
ibis.read_csv("penguins.csv").head(3)
1
Write the table to a CSV file. Dependent on backend.
2
Read the CSV file into a table. Dependent on backend.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64int64int64stringint64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Torgersen39.118.71813750male  2007 │
│ Adelie Torgersen39.517.41863800female2007 │
│ Adelie Torgersen40.318.01953250female2007 │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘
pip install 'ibis-framework[duckdb,deltalake]'
t.to_delta("penguins.delta", mode="overwrite")
ibis.read_delta("penguins.delta").head(3)
1
Write the table to a Delta Lake table. Dependent on backend.
2
Read the Delta Lake table into a table. Dependent on backend.

---------------------------------------------------------------------------
InvalidInputException                     Traceback (most recent call last)
File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/IPython/core/formatters.py:770, in PlainTextFormatter.__call__(self, obj)
    763 stream = StringIO()
    764 printer = pretty.RepresentationPrinter(stream, self.verbose,
    765     self.max_width, self.newline,
    766     max_seq_length=self.max_seq_length,
    767     singleton_pprinters=self.singleton_printers,
    768     type_pprinters=self.type_printers,
    769     deferred_pprinters=self.deferred_printers)
--> 770 printer.pretty(obj)
    771 printer.flush()
    772 return stream.getvalue()

File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/IPython/lib/pretty.py:419, in RepresentationPrinter.pretty(self, obj)
    408                         return meth(obj, self, cycle)
    409                 if (
    410                     cls is not object
    411                     # check if cls defines __repr__
   (...)
    417                     and callable(_safe_getattr(cls, "__repr__", None))
    418                 ):
--> 419                     return _repr_pprint(obj, self, cycle)
    421     return _default_pprint(obj, self, cycle)
    422 finally:

File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/IPython/lib/pretty.py:794, in _repr_pprint(obj, p, cycle)
    792 """A pprint that just redirects to the normal repr function."""
    793 # Find newlines and replace them with p.break_()
--> 794 output = repr(obj)
    795 lines = output.splitlines()
    796 with p.group():

File ~/work/ibis/ibis/ibis/expr/types/core.py:83, in Expr.__repr__(self)
     81 def __repr__(self) -> str:
     82     if ibis.options.interactive:
---> 83         return _capture_rich_renderable(self)
     84     else:
     85         return self._noninteractive_repr()

File ~/work/ibis/ibis/ibis/expr/types/core.py:63, in _capture_rich_renderable(renderable)
     61 console = Console(force_terminal=False)
     62 with console.capture() as capture:
---> 63     console.print(renderable)
     64 return capture.get().rstrip()

File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/rich/console.py:1705, in Console.print(self, sep, end, style, justify, overflow, no_wrap, emoji, markup, highlight, width, height, crop, soft_wrap, new_line_start, *objects)
   1703 if style is None:
   1704     for renderable in renderables:
-> 1705         extend(render(renderable, render_options))
   1706 else:
   1707     for renderable in renderables:

File /nix/store/nrbzcmvgdg9nya6mnrn339my3dfgxd68-ibis-3.12/lib/python3.12/site-packages/rich/console.py:1306, in Console.render(self, renderable, options)
   1304 renderable = rich_cast(renderable)
   1305 if hasattr(renderable, "__rich_console__") and not isclass(renderable):
-> 1306     render_iterable = renderable.__rich_console__(self, _options)
   1307 elif isinstance(renderable, str):
   1308     text_renderable = self.render_str(
   1309         renderable, highlight=_options.highlight, markup=_options.markup
   1310     )

File ~/work/ibis/ibis/ibis/expr/types/core.py:106, in Expr.__rich_console__(self, console, options)
    103 if opts.interactive:
    104     from ibis.expr.types.pretty import to_rich
--> 106     rich_object = to_rich(self, console_width=console_width)
    107 else:
    108     rich_object = Text(self._noninteractive_repr())

File ~/work/ibis/ibis/ibis/expr/types/pretty.py:279, in to_rich(expr, max_rows, max_columns, max_length, max_string, max_depth, console_width)
    275     return _to_rich_scalar(
    276         expr, max_length=max_length, max_string=max_string, max_depth=max_depth
    277     )
    278 else:
--> 279     return _to_rich_table(
    280         expr,
    281         max_rows=max_rows,
    282         max_columns=max_columns,
    283         max_length=max_length,
    284         max_string=max_string,
    285         max_depth=max_depth,
    286         console_width=console_width,
    287     )

File ~/work/ibis/ibis/ibis/expr/types/pretty.py:352, in _to_rich_table(tablish, max_rows, max_columns, max_length, max_string, max_depth, console_width)
    349     if orig_ncols > len(computed_cols):
    350         table = table.select(*computed_cols)
--> 352 result = table.limit(max_rows + 1).to_pyarrow()
    353 # Now format the columns in order, stopping if the console width would
    354 # be exceeded.
    355 col_info = []

File ~/work/ibis/ibis/ibis/expr/types/core.py:577, in Expr.to_pyarrow(self, params, limit, **kwargs)
    549 @experimental
    550 def to_pyarrow(
    551     self,
   (...)
    555     **kwargs: Any,
    556 ) -> pa.Table:
    557     """Execute expression and return results in as a pyarrow table.
    558 
    559     This method is eager and will execute the associated expression
   (...)
    575         A pyarrow table holding the results of the executed expression.
    576     """
--> 577     return self._find_backend(use_default=True).to_pyarrow(
    578         self, params=params, limit=limit, **kwargs
    579     )

File ~/work/ibis/ibis/ibis/backends/duckdb/__init__.py:1304, in Backend.to_pyarrow(self, expr, params, limit, **_)
   1296 def to_pyarrow(
   1297     self,
   1298     expr: ir.Expr,
   (...)
   1302     **_: Any,
   1303 ) -> pa.Table:
-> 1304     table = self._to_duckdb_relation(expr, params=params, limit=limit).arrow()
   1305     return expr.__pyarrow_result__(table)

InvalidInputException: Invalid Input Error: arrow_scan: get_next failed(): IOError: Repetition level histogram size mismatch
pip install 'ibis-framework[duckdb]'
t.to_parquet("penguins.parquet")
ibis.read_parquet("penguins.parquet").head(3)
1
Write the table to a Parquet file. Dependent on backend.
2
Read the Parquet file into a table. Dependent on backend.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64int64int64stringint64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Torgersen39.118.71813750male  2007 │
│ Adelie Torgersen39.517.41863800female2007 │
│ Adelie Torgersen40.318.01953250female2007 │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

With other Python libraries

Ibis uses Apache Arrow for efficient data transfer to and from other libraries. Ibis tables implement the __dataframe__ and __array__ protocols, so you can pass them to any library that supports these protocols.

You can convert Ibis tables to pandas dataframes.

pip install pandas
df = t.to_pandas()
df.head(3)
1
Returns a pandas dataframe.
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
0 Adelie Torgersen 39.1 18.7 181.0 3750.0 male 2007
1 Adelie Torgersen 39.5 17.4 186.0 3800.0 female 2007
2 Adelie Torgersen 40.3 18.0 195.0 3250.0 female 2007

Or you can convert pandas dataframes to Ibis tables.

t = ibis.memtable(df)
t.head(3)
1
Returns an Ibis table.
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64float64float64stringint64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Torgersen39.118.7181.03750.0male  2007 │
│ Adelie Torgersen39.517.4186.03800.0female2007 │
│ Adelie Torgersen40.318.0195.03250.0female2007 │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

You can convert Ibis tables to Polars dataframes.

pip install polars
import polars as pl

df = pl.from_arrow(t.to_pyarrow())
df.head(3)
shape: (3, 8)
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
str str f64 f64 f64 f64 str i64
"Adelie" "Torgersen" 39.1 18.7 181.0 3750.0 "male" 2007
"Adelie" "Torgersen" 39.5 17.4 186.0 3800.0 "female" 2007
"Adelie" "Torgersen" 40.3 18.0 195.0 3250.0 "female" 2007

Or Polars dataframes to Ibis tables.

t = ibis.memtable(df)
t.head(3)
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64float64float64stringint64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Torgersen39.118.7181.03750.0male  2007 │
│ Adelie Torgersen39.517.4186.03800.0female2007 │
│ Adelie Torgersen40.318.0195.03250.0female2007 │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

You can convert Ibis tables to PyArrow tables.

pip install pyarrow
t.to_pyarrow()
pyarrow.Table
species: string
island: string
bill_length_mm: double
bill_depth_mm: double
flipper_length_mm: double
body_mass_g: double
sex: string
year: int64
----
species: [["Adelie","Adelie","Adelie","Adelie","Adelie",...,"Chinstrap","Chinstrap","Chinstrap","Chinstrap","Chinstrap"]]
island: [["Torgersen","Torgersen","Torgersen","Torgersen","Torgersen",...,"Dream","Dream","Dream","Dream","Dream"]]
bill_length_mm: [[39.1,39.5,40.3,null,36.7,...,55.8,43.5,49.6,50.8,50.2]]
bill_depth_mm: [[18.7,17.4,18,null,19.3,...,19.8,18.1,18.2,19,18.7]]
flipper_length_mm: [[181,186,195,null,193,...,207,202,193,210,198]]
body_mass_g: [[3750,3800,3250,null,3450,...,4000,3400,3775,4100,3775]]
sex: [["male","female","female",null,"female",...,"male","female","male","male","female"]]
year: [[2007,2007,2007,2007,2007,...,2009,2009,2009,2009,2009]]

Or PyArrow batches:

t.to_pyarrow_batches()
<pyarrow.lib.RecordBatchReader at 0x7fff849c3d50>

And you can convert PyArrow tables to Ibis tables.

ibis.memtable(t.to_pyarrow()).head(3)
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┓
┃ species  island     bill_length_mm  bill_depth_mm  flipper_length_mm  body_mass_g  sex     year  ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━┩
│ stringstringfloat64float64float64float64stringint64 │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼─────────────┼────────┼───────┤
│ Adelie Torgersen39.118.7181.03750.0male  2007 │
│ Adelie Torgersen39.517.4186.03800.0female2007 │
│ Adelie Torgersen40.318.0195.03250.0female2007 │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴─────────────┴────────┴───────┘

You can convert Ibis tables to torch tensors.

pip install torch
t.select(s.numeric()).limit(3).to_torch()
{'col2': tensor([39.1000, 39.5000, 40.3000], dtype=torch.float64),
 'col3': tensor([18.7000, 17.4000, 18.0000], dtype=torch.float64),
 'col4': tensor([181., 186., 195.], dtype=torch.float64),
 'col5': tensor([3750., 3800., 3250.], dtype=torch.float64),
 'col7': tensor([2007, 2007, 2007], dtype=torch.int16)}

You can directly call the __dataframe__ protocol on Ibis tables, though this is typically handled by the library you’re using.

t.__dataframe__()
<ibis.expr.types.dataframe_interchange.IbisDataFrame at 0x7fff84536e70>

You can directly call the __array__ protocol on Ibis tables, though this is typically handled by the library you’re using.

t.__array__()
array([['Adelie', 'Torgersen', 39.1, ..., 3750.0, 'male', 2007],
       ['Adelie', 'Torgersen', 39.5, ..., 3800.0, 'female', 2007],
       ['Adelie', 'Torgersen', 40.3, ..., 3250.0, 'female', 2007],
       ...,
       ['Chinstrap', 'Dream', 49.6, ..., 3775.0, 'male', 2009],
       ['Chinstrap', 'Dream', 50.8, ..., 4100.0, 'male', 2009],
       ['Chinstrap', 'Dream', 50.2, ..., 3775.0, 'female', 2009]],
      shape=(344, 8), dtype=object)
Back to top