Source code for sqlalchemy_jdbcapi.jdbc.dataframe

"""
DataFrame integration for pandas, polars, and Apache Arrow.

This module provides utilities to convert JDBC query results directly
into DataFrames for data science and ML workflows.
"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
    from .cursor import Cursor

logger = logging.getLogger(__name__)


[docs] def cursor_to_pandas(cursor: Cursor) -> Any: """ Convert cursor results to pandas DataFrame. Args: cursor: Cursor with executed query Returns: pandas.DataFrame Raises: ImportError: If pandas is not installed ValueError: If cursor has no results Example: >>> cursor.execute("SELECT * FROM users") >>> df = cursor_to_pandas(cursor) >>> print(df.head()) """ try: import pandas as pd except ImportError as e: raise ImportError( "pandas is not installed. Install with: pip install pandas" ) from e if cursor.description is None: raise ValueError("Cursor has no result set") # Get column names columns = [desc[0] for desc in cursor.description] # Fetch all rows rows = cursor.fetchall() # Create DataFrame df = pd.DataFrame(rows, columns=columns) logger.debug(f"Created pandas DataFrame with shape {df.shape}") return df
[docs] def cursor_to_polars(cursor: Cursor) -> Any: """ Convert cursor results to polars DataFrame. Args: cursor: Cursor with executed query Returns: polars.DataFrame Raises: ImportError: If polars is not installed ValueError: If cursor has no results Example: >>> cursor.execute("SELECT * FROM users") >>> df = cursor_to_polars(cursor) >>> print(df.head()) """ try: import polars as pl except ImportError as e: raise ImportError( "polars is not installed. Install with: pip install polars" ) from e if cursor.description is None: raise ValueError("Cursor has no result set") # Get column names columns = [desc[0] for desc in cursor.description] # Fetch all rows rows = cursor.fetchall() # Create DataFrame from dict of lists data = {col: [row[i] for row in rows] for i, col in enumerate(columns)} df = pl.DataFrame(data) logger.debug(f"Created polars DataFrame with shape {df.shape}") return df
[docs] def cursor_to_arrow(cursor: Cursor) -> Any: """ Convert cursor results to Apache Arrow Table. Args: cursor: Cursor with executed query Returns: pyarrow.Table Raises: ImportError: If pyarrow is not installed ValueError: If cursor has no results Example: >>> cursor.execute("SELECT * FROM users") >>> table = cursor_to_arrow(cursor) >>> print(table.schema) """ try: import pyarrow as pa except ImportError as e: raise ImportError( "pyarrow is not installed. Install with: pip install pyarrow" ) from e if cursor.description is None: raise ValueError("Cursor has no result set") # Get column names columns = [desc[0] for desc in cursor.description] # Fetch all rows rows = cursor.fetchall() # Convert to Arrow Table # Build column arrays if not rows: # Empty result arrays = [pa.array([]) for _ in columns] else: # Transpose rows to columns col_data = [[row[i] for row in rows] for i in range(len(columns))] arrays = [pa.array(col) for col in col_data] table = pa.Table.from_arrays(arrays, names=columns) logger.debug(f"Created Arrow Table with {table.num_rows} rows") return table
[docs] def cursor_to_dict(cursor: Cursor) -> list[dict[str, Any]]: """ Convert cursor results to list of dictionaries. Args: cursor: Cursor with executed query Returns: List of row dictionaries Example: >>> cursor.execute("SELECT * FROM users") >>> rows = cursor_to_dict(cursor) >>> print(rows[0]) {'id': 1, 'name': 'Alice', 'email': 'alice@example.com'} """ if cursor.description is None: raise ValueError("Cursor has no result set") columns = [desc[0] for desc in cursor.description] rows = cursor.fetchall() return [dict(zip(columns, row)) for row in rows]
# Add convenience methods to Cursor class def _add_dataframe_methods() -> None: """Add DataFrame methods to Cursor class.""" from .cursor import Cursor # Add methods Cursor.to_pandas = lambda self: cursor_to_pandas(self) # type: ignore Cursor.to_polars = lambda self: cursor_to_polars(self) # type: ignore Cursor.to_arrow = lambda self: cursor_to_arrow(self) # type: ignore Cursor.to_dict = lambda self: cursor_to_dict(self) # type: ignore logger.debug("Added DataFrame methods to Cursor class") # Auto-register methods on import try: _add_dataframe_methods() except Exception as e: logger.debug(f"Could not add DataFrame methods: {e}")