Quickstart ========== .. code:: ipython3 import pandas as pd from parquetranger import TableRepo .. code:: ipython3 df = pd.DataFrame( { "A": [1, 2, 3, 4, 5, 6], "B": ["x", "y", "z", "x1", "x2", "x3"], "C": [1, 2, 1, 1, 1, 2], "C2": ["a", "a", "b", "a", "c", "c"], }, index=["a1", "a2", "a3", "a4", "a5", "a6"], ) .. code:: ipython3 df .. raw:: html

	A	B	C	C2
a1	1	x	1	a
a2	2	y	2	a
a3	3	z	1	b
a4	4	x1	1	a
a5	5	x2	1	c
a6	6	x3	2	c

.. code:: ipython3 trepo = TableRepo("some_tmp_path", group_cols="C2") # this creates the directory .. code:: ipython3 trepo.extend(df) .. code:: ipython3 trepo.get_full_df() .. raw:: html

	A	B	C	C2
a1	1	x	1	a
a2	2	y	2	a
a4	4	x1	1	a
a3	3	z	1	b
a5	5	x2	1	c
a6	6	x3	2	c

.. code:: ipython3 df2 = pd.DataFrame( { "A": [21, 22, 23], "B": ["X", "Y", "Z"], "C": [10,20,1], "C2": ["a", "b", "a"], }, index=["a1", "a4", "a7"] ) .. code:: ipython3 trepo.replace_records(df2) # replaces based on index .. code:: ipython3 trepo.get_full_df() .. raw:: html

	A	B	C	C2
a2	2	y	2	a
a1	21	X	10	a
a7	23	Z	1	a
a3	3	z	1	b
a4	22	Y	20	b
a5	5	x2	1	c
a6	6	x3	2	c

.. code:: ipython3 trepo.replace_groups(df2) .. code:: ipython3 trepo.get_full_df() # replaced the whole groups where C2==a and C2==b with the records that were present in df2 .. raw:: html

	A	B	C	C2
a1	21	X	10	a
a7	23	Z	1	a
a4	22	Y	20	b
a5	5	x2	1	c
a6	6	x3	2	c

.. code:: ipython3 trepo.replace_all(df2) # erases everything and puts df2 in. all traces of df are lost .. code:: ipython3 trepo.get_full_df() .. raw:: html

	A	B	C	C2
a1	21	X	10	a
a7	23	Z	1	a
a4	22	Y	20	b

.. code:: ipython3 trepo.replace_records(df, by_groups=True) # replaces records based on index, but only looks for indices within groups, so this way duplicate a4 index is possible # as they are in different groups, with different values in C2 .. code:: ipython3 trepo.get_full_df() .. raw:: html

	A	B	C	C2
a7	23	Z	1	a
a1	1	x	1	a
a2	2	y	2	a
a4	4	x1	1	a
a4	22	Y	20	b
a3	3	z	1	b
a5	5	x2	1	c
a6	6	x3	2	c

.. code:: ipython3 trepo.purge() # deletes everything