Use Arrow’s pg_copy
which is fast.
- Install the required libraries
pip install pyarrow psycopg2-binary
- Ingest the Parquet file
import pyarrow.parquet as pq
import psycopg2
table = pq.read_table("path/to/data.parquet")
# connect to PostgreSQL
conn = psycopg2.connect("dbname=my_database user=postgres password=mysecret")
#cur = conn.cursor()? needed?
with conn.cursor() as cur:
cur.copy_expert("COPY my_pg_table FROM STDIN WITH (FORMAT BINARY)", table.to_pandas().to_csv(index=False, header=False))
conn.commit()