Source code for terracotta.drivers.sqlite_remote_meta_store


SQLite-backed metadata driver. Metadata is stored in an SQLite database.

import contextlib
import logging
import os
import shutil
import tempfile
import time
import urllib.parse as urlparse
from pathlib import Path
from typing import Iterator, Union

from terracotta import exceptions, get_settings
from terracotta.drivers.sqlite_meta_store import SQLiteMetaStore
from terracotta.profile import trace

logger = logging.getLogger(__name__)

def convert_exceptions(msg: str) -> Iterator:
    """Convert internal boto exceptions to our InvalidDatabaseError"""
    import botocore.exceptions

    except botocore.exceptions.ClientError as exc:
        raise exceptions.InvalidDatabaseError(msg) from exc

def _update_from_s3(remote_path: str, local_path: str) -> None:
    import boto3

    parsed_remote_path = urlparse.urlparse(remote_path)
    bucket_name, key = parsed_remote_path.netloc, parsed_remote_path.path.strip("/")

    if parsed_remote_path.scheme != "s3":
        raise ValueError("Expected s3:// URL")

    s3 = boto3.resource("s3")
    obj = s3.Object(bucket_name, key)
    obj_bytes = obj.get()["Body"]

    # copy over existing database; this is somewhat safe since it is read-only
    with open(local_path, "wb") as f:
        shutil.copyfileobj(obj_bytes, f)

[docs] class RemoteSQLiteMetaStore(SQLiteMetaStore): """An SQLite-backed metadata driver, where the database file is stored remotely on S3. Stores metadata and paths to raster files in SQLite. See also: :class:`~terracotta.drivers.sqlite.SQLiteDriver` for the local version of this driver. The SQLite database is simply a file that can be stored e.g. together with the actual raster files on S3. Before handling the first request, this driver will download a temporary copy of the remote database file. It is thus not feasible for large databases. The local database copy will be updated in regular intervals defined by :attr:`~terracotta.config.TerracottaSettings.REMOTE_DB_CACHE_TTL`. Warning: This driver is read-only. Any attempts to use the create, insert, or delete methods will throw a DatabaseNotWritableError. """ _WRITABLE: bool = False def __init__(self, remote_path: Union[str, Path]) -> None: """Initialize the RemoteSQLiteDriver. This should not be called directly, use :func:`~terracotta.get_driver` instead. Arguments: remote_path: S3 URL in the form ``s3://bucket/key`` to remote SQLite database (has to exist). """ settings = get_settings() self.__rm = os.remove # keep reference to use in __del__ os.makedirs(settings.REMOTE_DB_CACHE_DIR, exist_ok=True) local_db_file = tempfile.NamedTemporaryFile( dir=settings.REMOTE_DB_CACHE_DIR, prefix="tc_s3_db_", suffix=".sqlite", delete=False, ) local_db_file.close() self._local_path = self._remote_path = str(remote_path) # download database with convert_exceptions("Could not retrieve database from S3"): _update_from_s3(self._remote_path, self._local_path) self._last_updated = time.time() super().__init__( @classmethod def _normalize_path(cls, path: str) -> str: parts = urlparse.urlparse(path) if not parts.hostname: parts = urlparse.urlparse(f"https://{path}") port = parts.port if port is None: port = 443 if parts.scheme == "https" else 80 path = f"{parts.scheme}://{parts.hostname}:{port}{parts.path}" path = path.rstrip("/") return path @convert_exceptions("Could not retrieve database from S3") @trace("download_db_from_s3") def _update_db(self, remote_path: str, local_path: str) -> None: settings = get_settings() if self._last_updated < time.time() - settings.REMOTE_DB_CACHE_TTL: logger.debug("Remote database cache expired, re-downloading") _update_from_s3(remote_path, local_path) self._last_updated = time.time() def _connection_callback(self) -> None: self._update_db(self._remote_path, self._local_path) super()._connection_callback() def __del__(self) -> None: """Clean up temporary database upon exit""" try: self.__rm(self._local_path) except OSError: # pragma: no cover pass