Files
otivm/data/create_otivm_db.sql

272 lines
11 KiB
SQL

PRAGMA journal_mode = WAL;
PRAGMA foreign_keys = ON;
-- ---------------------------------------------------------------
-- 1. Lookup tables (written once, never modified)
-- ---------------------------------------------------------------
CREATE TABLE lifecycle_states (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
INSERT INTO lifecycle_states VALUES
(1, 'draft'),
(2, 'current'),
(3, 'superseded'),
(4, 'retired');
CREATE TABLE confidence_grades (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
description TEXT NOT NULL
);
INSERT INTO confidence_grades VALUES
(1, 'measured', 'Directly observed or instrumentally measured. Published dataset with explicit methodology.'),
(2, 'indicated', 'Recorded in registry or survey without direct measurement. Classification may be broad.'),
(3, 'inferred', 'Derived from landscape position, proximity to measured cells, or modelled from adjacent data.'),
(4, 'no_data', 'Source dataset has no coverage for this cell. Field value is a known placeholder.');
CREATE TABLE source_registry (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_key TEXT NOT NULL UNIQUE, -- e.g. 'GEBCO_2025'
source_name TEXT NOT NULL,
source_url TEXT,
version TEXT NOT NULL,
license TEXT,
citation TEXT,
registered_at TEXT NOT NULL -- ISO 8601 UTC
);
INSERT INTO source_registry (source_key, source_name, source_url, version, license, citation, registered_at) VALUES
('GEBCO_2025',
'GEBCO 2025 Grid',
'https://www.gebco.net/data_and_products/gridded_bathymetry_data/',
'2025',
'CC-BY 4.0',
'GEBCO Compilation Group (2025) GEBCO 2025 Grid (doi:10.5285/a29c5465-b138-234d-e053-6c86abc0dc7f)',
'2026-04-26T00:00:00Z'),
('ESA_WORLDCOVER_V200',
'ESA WorldCover v200',
'https://esa-worldcover.org/',
'v2.0.0',
'CC-BY 4.0',
'Zanaga et al. (2022) ESA WorldCover 10m 2021 v200 (doi:10.5281/zenodo.7254221)',
'2026-04-26T00:00:00Z'),
('HYDROSHEDS_V11',
'HydroSHEDS v1.1',
'https://www.hydrosheds.org/',
'1.1',
'CC-BY 4.0',
'Lehner et al. (2022) HydroSHEDS v1.1 Technical Documentation. WWF US, Washington DC.',
'2026-04-26T00:00:00Z'),
('USGS_MRDS',
'USGS Mineral Resources Data System',
'https://mrdata.usgs.gov/mrds/',
'2022-08-23',
'public domain',
'USGS (2022) Mineral Resources Data System (MRDS). U.S. Geological Survey Data Release.',
'2026-04-26T00:00:00Z'),
('BGR_IGME5000',
'BGR IGME5000',
'https://www.bgr.bund.de/igme5000/',
'2007',
'Geonutz 2013',
'Asch K. (2005) The 1:5 Million International Geological Map of Europe and Adjacent Areas. BGR, Hannover.',
'2026-04-26T00:00:00Z'),
('TESSERA3_SEED',
'TESSERA 3.0 seed extraction',
NULL,
'2026-04-26',
'internal',
'TheRON/tesserav3 pipeline, tessera.db SpatiaLite export, 2026-04-26.',
'2026-04-26T00:00:00Z');
-- ---------------------------------------------------------------
-- 2. Pipeline tracking
-- ---------------------------------------------------------------
CREATE TABLE pipeline_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_key TEXT NOT NULL UNIQUE, -- e.g. 'tessera3-seed-2026-04-26'
started_at TEXT NOT NULL, -- ISO 8601 UTC
completed_at TEXT, -- NULL while running
status INTEGER NOT NULL REFERENCES lifecycle_states(id),
h5_cells TEXT NOT NULL, -- JSON array of H3 res-5 integer IDs
fields_updated TEXT NOT NULL, -- JSON array of field names
source_versions TEXT NOT NULL, -- JSON object: {source_key: version}
row_count INTEGER, -- NULL while running
notes TEXT
);
-- ---------------------------------------------------------------
-- 3. Core cell table
-- ---------------------------------------------------------------
-- H3 cell IDs are stored as INTEGER (64-bit H3 index), not TEXT.
-- Use h3.str_to_int() / h3.int_to_str() in Python for conversion.
CREATE TABLE tessera_cells (
-- Identity
id INTEGER PRIMARY KEY AUTOINCREMENT,
h9 INTEGER NOT NULL, -- H3 res-9 index (64-bit)
h7 INTEGER NOT NULL, -- H3 res-7 parent index
h5 INTEGER NOT NULL, -- H3 res-5 grandparent index (waypoint)
lat REAL NOT NULL, -- H9 centroid latitude
lon REAL NOT NULL, -- H9 centroid longitude
-- Physical fields (RFC-TESSERA-2.0-001 byte layout preserved)
elev_cm INTEGER, -- Elevation in cm, signed 24-bit range
terrain INTEGER, -- Appendix A terrain code
hydro INTEGER, -- Hydrology code
geo_dep INTEGER, -- Geology deposit code
geo_flag INTEGER, -- Geology flag code
occ_flag INTEGER, -- RFC-TESSERA-3.0-OCC-001 Section 2 code
-- Provenance per field (source FK + confidence FK)
elev_src INTEGER REFERENCES source_registry(id),
elev_conf INTEGER REFERENCES confidence_grades(id),
terr_src INTEGER REFERENCES source_registry(id),
terr_conf INTEGER REFERENCES confidence_grades(id),
hydro_src INTEGER REFERENCES source_registry(id),
hydro_conf INTEGER REFERENCES confidence_grades(id),
gdep_src INTEGER REFERENCES source_registry(id),
gdep_conf INTEGER REFERENCES confidence_grades(id),
gflag_src INTEGER REFERENCES source_registry(id),
gflag_conf INTEGER REFERENCES confidence_grades(id),
occ_src INTEGER REFERENCES source_registry(id),
occ_conf INTEGER REFERENCES confidence_grades(id),
-- Lifecycle
status INTEGER NOT NULL DEFAULT 1
REFERENCES lifecycle_states(id),
run_id INTEGER NOT NULL REFERENCES pipeline_runs(id),
created_at TEXT NOT NULL, -- ISO 8601 UTC
superseded_by INTEGER REFERENCES tessera_cells(id),
retired_reason TEXT
);
CREATE INDEX idx_cells_h9_status ON tessera_cells(h9, status);
CREATE INDEX idx_cells_h5_status ON tessera_cells(h5, status);
CREATE INDEX idx_cells_h7_status ON tessera_cells(h7, status);
CREATE INDEX idx_cells_run ON tessera_cells(run_id);
-- ---------------------------------------------------------------
-- 4. H5 coverage completeness tracking
-- ---------------------------------------------------------------
CREATE TABLE h5_coverage (
h5 INTEGER PRIMARY KEY, -- H3 res-5 index
status INTEGER NOT NULL REFERENCES lifecycle_states(id),
-- 1=draft (in progress), 2=current (complete), 4=retired
h9_total INTEGER NOT NULL, -- Expected H9 count (typically 2401)
h9_current INTEGER NOT NULL DEFAULT 0,
last_updated TEXT NOT NULL, -- ISO 8601 UTC
run_id INTEGER NOT NULL REFERENCES pipeline_runs(id),
notes TEXT
);
-- ---------------------------------------------------------------
-- 5. Palaeoenvironmental sea level epochs
-- Per RFC-TESSERA-3.0-PALEO-001 Section 5
-- Offsets verified against Lambeck et al. 2014 (PNAS 111:43),
-- Lambeck et al. 2004 (Science 292:5517),
-- Clark et al. 2009 (Science 325:5941),
-- Dutton & Lambeck 2012 (Science 337:6091).
-- All offsets are eustatic only — no isostatic correction.
-- Negative = sea was lower = more land than today.
-- approx_date_bce: positive = BCE, negative = CE.
-- ---------------------------------------------------------------
CREATE TABLE paleo_epochs (
epoch_key TEXT PRIMARY KEY,
label TEXT NOT NULL,
approx_date_bce INTEGER NOT NULL, -- positive = BCE, negative = CE
sl_offset_cm INTEGER NOT NULL, -- offset from modern MSL
confidence INTEGER NOT NULL
REFERENCES confidence_grades(id),
primary_citation TEXT NOT NULL,
notes TEXT
);
INSERT INTO paleo_epochs
(epoch_key, label, approx_date_bce, sl_offset_cm, confidence, primary_citation, notes)
VALUES
('present',
'Present day',
-2025,
0,
1,
'GEBCO Compilation Group (2025) GEBCO 2025 Grid (doi:10.5285/a29c5465-b138-234d-e053-6c86abc0dc7f)',
'Baseline. EGM2008 geoid, modern MSL = 0. No correction applied.'),
('roman_14bce',
'Roman period, 14 BCE',
14,
-10,
2,
'Lambeck et al. (2004) Sea-level change through the last glacial cycle. Science 292(5517).',
'Mediterranean RSL within measurement uncertainty of present. Treat as 0 for rendering.'),
('subboral_3000bce',
'Sub-Boreal, 3000 BCE',
3000,
-200,
2,
'Lambeck et al. (2014) Sea level and global ice volumes from the Last Glacial Maximum to the Holocene. PNAS 111(43).',
NULL),
('atlantic_6000bce',
'Atlantic optimum, 6000 BCE',
6000,
-500,
2,
'Lambeck et al. (2014) Sea level and global ice volumes from the Last Glacial Maximum to the Holocene. PNAS 111(43).',
NULL),
('mesolithic_8000bce',
'Mesolithic, 8000 BCE',
8000,
-2500,
2,
'Lambeck et al. (2014) Sea level and global ice volumes from the Last Glacial Maximum to the Holocene. PNAS 111(43).',
'Primary epoch for CIVICVS simulation. Adriatic, Gulf of Gabes, Nile delta visibly affected. Mediterranean-specific RSL closer to -1500 to -2000cm; global eustatic value used per RFC-TESSERA-3.0-PALEO-001 Section 1.5.'),
('preboreal_10000bce',
'Pre-Boreal, 10000 BCE',
10000,
-5000,
2,
'Lambeck et al. (2014) Sea level and global ice volumes from the Last Glacial Maximum to the Holocene. PNAS 111(43).',
NULL),
('younger_dryas_11000bce',
'Younger Dryas, 11000 BCE',
11000,
-7000,
3,
'Lambeck et al. (2014) Sea level and global ice volumes from the Last Glacial Maximum to the Holocene. PNAS 111(43).',
'Rapid sea level fall during Younger Dryas stadial. Offset is approximate.'),
('lgm_20000bce',
'Last Glacial Maximum, 20000 BCE',
20000,
-12000,
2,
'Clark et al. (2009) The Last Glacial Maximum. Science 325(5941).',
'Sicilian Channel partially exposed. Cyprus enlarged. Reserved for future use.'),
('eem_125000bce',
'Eemian interglacial, 125000 BCE',
125000,
600,
3,
'Dutton & Lambeck (2012) Ice volume and sea level during the last interglacial. Science 337(6091).',
'Positive offset: sea was higher than today. Reserved for future use.');