Source code for sqlalchemy_jdbcapi.jdbc.driver_manager

"""
JDBC driver auto-download and management.

This module handles automatic downloading of JDBC drivers from Maven Central
and provides fallback to manual driver configuration via CLASSPATH.
"""

from __future__ import annotations

import logging
import os
import shutil
import urllib.request
from pathlib import Path
from typing import NamedTuple

logger = logging.getLogger(__name__)

# Default driver cache directory
DEFAULT_DRIVER_CACHE = Path.home() / ".sqlalchemy-jdbcapi" / "drivers"


[docs] class JDBCDriver(NamedTuple): """JDBC driver metadata for automatic download.""" group_id: str artifact_id: str version: str classifier: str | None = None @property def filename(self) -> str: """Get the JAR filename.""" if self.classifier: return f"{self.artifact_id}-{self.version}-{self.classifier}.jar" return f"{self.artifact_id}-{self.version}.jar" @property def maven_url(self) -> str: """Get the Maven Central download URL.""" base_url = "https://repo1.maven.org/maven2" group_path = self.group_id.replace(".", "/") return ( f"{base_url}/{group_path}/{self.artifact_id}/{self.version}/{self.filename}" )
# Recommended JDBC drivers for auto-download from Maven Central # These versions are tested and known to work well # TODO: Consider checking for newer versions periodically RECOMMENDED_JDBC_DRIVERS = { "postgresql": JDBCDriver( group_id="org.postgresql", artifact_id="postgresql", version="42.7.1", # Latest stable as of 2024 ), "mysql": JDBCDriver( group_id="com.mysql", artifact_id="mysql-connector-j", version="8.3.0", # Note: Oracle renamed this from mysql-connector-java ), "mariadb": JDBCDriver( group_id="org.mariadb.jdbc", artifact_id="mariadb-java-client", version="3.3.2", ), "mssql": JDBCDriver( group_id="com.microsoft.sqlserver", artifact_id="mssql-jdbc", version="12.6.0.jre11", # JRE11 version for Java 11+ compatibility ), "oracle": JDBCDriver( group_id="com.oracle.database.jdbc", artifact_id="ojdbc11", version="23.3.0.23.09", ), "db2": JDBCDriver( group_id="com.ibm.db2", artifact_id="jcc", version="11.5.9.0", ), "sqlite": JDBCDriver( group_id="org.xerial", artifact_id="sqlite-jdbc", version="3.45.0.0", ), "oceanbase": JDBCDriver( group_id="com.oceanbase", artifact_id="oceanbase-client", version="2.4.9", ), # New drivers "gbase": JDBCDriver( group_id="com.gbasedbt", artifact_id="gbasedbt-jdbc", version="3.5.1", # GBase 8s JDBC driver ), "iseries": JDBCDriver( group_id="net.sf.jt400", artifact_id="jt400", version="20.0.7", # IBM Toolbox for Java (JT400) ), "access": JDBCDriver( group_id="net.sf.ucanaccess", artifact_id="ucanaccess", version="5.0.1", # UCanAccess JDBC driver ), "avatica": JDBCDriver( group_id="org.apache.calcite.avatica", artifact_id="avatica-core", version="1.25.0", # Apache Avatica ), "phoenix": JDBCDriver( group_id="org.apache.phoenix", artifact_id="phoenix-client-hbase-2.5", version="5.1.3", # Apache Phoenix ), # Connection pooling "hikari": JDBCDriver( group_id="com.zaxxer", artifact_id="HikariCP", version="5.1.0", # HikariCP connection pool ), }
[docs] def get_driver_cache_dir() -> Path: """ Get the driver cache directory. Returns: Path to the driver cache directory. """ cache_dir = os.environ.get("SQLALCHEMY_JDBCAPI_DRIVER_CACHE") if cache_dir: return Path(cache_dir) return DEFAULT_DRIVER_CACHE
[docs] def download_driver( driver: JDBCDriver, cache_dir: Path | None = None, force: bool = False, ) -> Path: """ Download a JDBC driver from Maven Central. Args: driver: JDBC driver metadata. cache_dir: Directory to cache downloaded drivers. If None, uses default. force: Force re-download even if driver exists. Returns: Path to the downloaded driver JAR file. Raises: RuntimeError: If download fails. """ if cache_dir is None: cache_dir = get_driver_cache_dir() # Create cache directory if it doesn't exist cache_dir.mkdir(parents=True, exist_ok=True) # Target file path target_path = cache_dir / driver.filename # Check if driver already exists if target_path.exists() and not force: logger.debug(f"Driver already cached: {target_path}") return target_path # Download driver logger.info(f"Downloading JDBC driver: {driver.filename}") logger.debug(f"URL: {driver.maven_url}") try: with urllib.request.urlopen(driver.maven_url) as response: # Download to temporary file first temp_path = target_path.with_suffix(".tmp") with open(temp_path, "wb") as f: shutil.copyfileobj(response, f) # Move to final location temp_path.replace(target_path) logger.info(f"Driver downloaded successfully: {target_path}") return target_path except Exception as e: error_msg = f"Failed to download driver from {driver.maven_url}: {e}" logger.error(error_msg) raise RuntimeError(error_msg) from e
[docs] def get_driver_path( database: str, driver: JDBCDriver | None = None, auto_download: bool = True, cache_dir: Path | None = None, ) -> Path: """ Get the path to a JDBC driver, downloading if necessary. Args: database: Database name (e.g., 'postgresql', 'mysql'). driver: Custom driver metadata. If None, uses recommended driver. auto_download: Whether to auto-download driver if not found. cache_dir: Directory to cache downloaded drivers. Returns: Path to the JDBC driver JAR file. Raises: RuntimeError: If driver not found and auto-download disabled. """ if driver is None: driver = RECOMMENDED_JDBC_DRIVERS.get(database.lower()) if driver is None: raise ValueError(f"No recommended driver for database: {database}") if cache_dir is None: cache_dir = get_driver_cache_dir() target_path = cache_dir / driver.filename # Check if driver exists in cache if target_path.exists(): return target_path # Try to auto-download if auto_download: return download_driver(driver, cache_dir) raise RuntimeError( f"JDBC driver not found: {target_path}. " f"Enable auto_download or set CLASSPATH environment variable." )
[docs] def get_all_driver_paths( databases: list[str] | None = None, auto_download: bool = True, cache_dir: Path | None = None, ) -> list[Path]: """ Get paths to multiple JDBC drivers. Args: databases: List of database names. If None, downloads all recommended drivers. auto_download: Whether to auto-download drivers if not found. cache_dir: Directory to cache downloaded drivers. Returns: List of paths to JDBC driver JAR files. """ if databases is None: databases = list(RECOMMENDED_JDBC_DRIVERS.keys()) paths = [] for database in databases: try: path = get_driver_path( database, auto_download=auto_download, cache_dir=cache_dir ) paths.append(path) except Exception as e: logger.warning(f"Failed to get driver for {database}: {e}") return paths
[docs] def get_classpath_with_drivers( databases: list[str] | None = None, auto_download: bool = True, manual_classpath: list[Path] | None = None, ) -> list[Path]: """ Get comprehensive classpath including auto-downloaded and manual drivers. Args: databases: List of database names for auto-download. If None, downloads all recommended. auto_download: Whether to auto-download drivers. manual_classpath: Additional manual classpath entries. Returns: List of all classpath entries. """ classpath = [] # Add manual classpath entries first (higher priority) if manual_classpath: classpath.extend(manual_classpath) # Add auto-downloaded drivers if auto_download: try: auto_paths = get_all_driver_paths(databases, auto_download=True) classpath.extend(auto_paths) except Exception as e: logger.warning(f"Failed to auto-download some drivers: {e}") # Remove duplicates while preserving order seen = set() unique_classpath = [] for path in classpath: if path not in seen: seen.add(path) unique_classpath.append(path) return unique_classpath
[docs] def verify_driver(driver_path: Path) -> bool: """ Verify that a JDBC driver JAR file is valid. Args: driver_path: Path to the driver JAR file. Returns: True if driver appears valid, False otherwise. """ if not driver_path.exists(): return False if not driver_path.is_file(): return False if not driver_path.suffix == ".jar": return False # Check if file is not empty if driver_path.stat().st_size == 0: return False # Could add more validation (e.g., ZIP file structure) return True
[docs] def list_cached_drivers(cache_dir: Path | None = None) -> list[Path]: """ List all cached JDBC drivers. Args: cache_dir: Directory to check. If None, uses default. Returns: List of paths to cached driver JAR files. """ if cache_dir is None: cache_dir = get_driver_cache_dir() if not cache_dir.exists(): return [] return [path for path in cache_dir.glob("*.jar") if verify_driver(path)]
[docs] def clear_driver_cache(cache_dir: Path | None = None) -> int: """ Clear the driver cache directory. Args: cache_dir: Directory to clear. If None, uses default. Returns: Number of files deleted. """ if cache_dir is None: cache_dir = get_driver_cache_dir() if not cache_dir.exists(): return 0 count = 0 for path in cache_dir.glob("*.jar"): try: path.unlink() count += 1 logger.debug(f"Deleted cached driver: {path}") except Exception as e: logger.warning(f"Failed to delete {path}: {e}") return count