174 lines
7.0 KiB
SQL
174 lines
7.0 KiB
SQL
PRAGMA journal_mode = WAL;
|
|
PRAGMA foreign_keys = ON;
|
|
|
|
-- ---------------------------------------------------------------
|
|
-- 1. Lookup tables (written once, never modified)
|
|
-- ---------------------------------------------------------------
|
|
|
|
CREATE TABLE lifecycle_states (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT NOT NULL UNIQUE
|
|
);
|
|
|
|
INSERT INTO lifecycle_states VALUES
|
|
(1, 'draft'),
|
|
(2, 'current'),
|
|
(3, 'superseded'),
|
|
(4, 'retired');
|
|
|
|
CREATE TABLE confidence_grades (
|
|
id INTEGER PRIMARY KEY,
|
|
name TEXT NOT NULL UNIQUE,
|
|
description TEXT NOT NULL
|
|
);
|
|
|
|
INSERT INTO confidence_grades VALUES
|
|
(1, 'measured', 'Directly observed or instrumentally measured. Published dataset with explicit methodology.'),
|
|
(2, 'indicated', 'Recorded in registry or survey without direct measurement. Classification may be broad.'),
|
|
(3, 'inferred', 'Derived from landscape position, proximity to measured cells, or modelled from adjacent data.'),
|
|
(4, 'no_data', 'Source dataset has no coverage for this cell. Field value is a known placeholder.');
|
|
|
|
CREATE TABLE source_registry (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
source_key TEXT NOT NULL UNIQUE, -- e.g. 'GEBCO_2025'
|
|
source_name TEXT NOT NULL,
|
|
source_url TEXT,
|
|
version TEXT NOT NULL,
|
|
license TEXT,
|
|
citation TEXT,
|
|
registered_at TEXT NOT NULL -- ISO 8601 UTC
|
|
);
|
|
|
|
INSERT INTO source_registry (source_key, source_name, source_url, version, license, citation, registered_at) VALUES
|
|
('GEBCO_2025',
|
|
'GEBCO 2025 Grid',
|
|
'https://www.gebco.net/data_and_products/gridded_bathymetry_data/',
|
|
'2025',
|
|
'CC-BY 4.0',
|
|
'GEBCO Compilation Group (2025) GEBCO 2025 Grid (doi:10.5285/a29c5465-b138-234d-e053-6c86abc0dc7f)',
|
|
'2026-04-26T00:00:00Z'),
|
|
|
|
('ESA_WORLDCOVER_V200',
|
|
'ESA WorldCover v200',
|
|
'https://esa-worldcover.org/',
|
|
'v2.0.0',
|
|
'CC-BY 4.0',
|
|
'Zanaga et al. (2022) ESA WorldCover 10m 2021 v200 (doi:10.5281/zenodo.7254221)',
|
|
'2026-04-26T00:00:00Z'),
|
|
|
|
('HYDROSHEDS_V11',
|
|
'HydroSHEDS v1.1',
|
|
'https://www.hydrosheds.org/',
|
|
'1.1',
|
|
'CC-BY 4.0',
|
|
'Lehner et al. (2022) HydroSHEDS v1.1 Technical Documentation. WWF US, Washington DC.',
|
|
'2026-04-26T00:00:00Z'),
|
|
|
|
('USGS_MRDS',
|
|
'USGS Mineral Resources Data System',
|
|
'https://mrdata.usgs.gov/mrds/',
|
|
'2022-08-23',
|
|
'public domain',
|
|
'USGS (2022) Mineral Resources Data System (MRDS). U.S. Geological Survey Data Release.',
|
|
'2026-04-26T00:00:00Z'),
|
|
|
|
('BGR_IGME5000',
|
|
'BGR IGME5000',
|
|
'https://www.bgr.bund.de/igme5000/',
|
|
'2007',
|
|
'Geonutz 2013',
|
|
'Asch K. (2005) The 1:5 Million International Geological Map of Europe and Adjacent Areas. BGR, Hannover.',
|
|
'2026-04-26T00:00:00Z'),
|
|
|
|
('TESSERA3_SEED',
|
|
'TESSERA 3.0 seed extraction',
|
|
NULL,
|
|
'2026-04-26',
|
|
'internal',
|
|
'TheRON/tesserav3 pipeline, tessera.db SpatiaLite export, 2026-04-26.',
|
|
'2026-04-26T00:00:00Z');
|
|
|
|
-- ---------------------------------------------------------------
|
|
-- 2. Pipeline tracking
|
|
-- ---------------------------------------------------------------
|
|
|
|
CREATE TABLE pipeline_runs (
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
run_key TEXT NOT NULL UNIQUE, -- e.g. 'tessera3-seed-2026-04-26'
|
|
started_at TEXT NOT NULL, -- ISO 8601 UTC
|
|
completed_at TEXT, -- NULL while running
|
|
status INTEGER NOT NULL REFERENCES lifecycle_states(id),
|
|
h5_cells TEXT NOT NULL, -- JSON array of H3 res-5 integer IDs
|
|
fields_updated TEXT NOT NULL, -- JSON array of field names
|
|
source_versions TEXT NOT NULL, -- JSON object: {source_key: version}
|
|
row_count INTEGER, -- NULL while running
|
|
notes TEXT
|
|
);
|
|
|
|
-- ---------------------------------------------------------------
|
|
-- 3. Core cell table
|
|
-- ---------------------------------------------------------------
|
|
|
|
-- H3 cell IDs are stored as INTEGER (64-bit H3 index), not TEXT.
|
|
-- Use h3.str_to_int() / h3.int_to_str() in Python for conversion.
|
|
|
|
CREATE TABLE tessera_cells (
|
|
-- Identity
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
h9 INTEGER NOT NULL, -- H3 res-9 index (64-bit)
|
|
h7 INTEGER NOT NULL, -- H3 res-7 parent index
|
|
h5 INTEGER NOT NULL, -- H3 res-5 grandparent index (waypoint)
|
|
lat REAL NOT NULL, -- H9 centroid latitude
|
|
lon REAL NOT NULL, -- H9 centroid longitude
|
|
|
|
-- Physical fields (RFC-TESSERA-2.0-001 byte layout preserved)
|
|
elev_cm INTEGER, -- Elevation in cm, signed 24-bit range
|
|
terrain INTEGER, -- Appendix A terrain code
|
|
hydro INTEGER, -- Hydrology code
|
|
geo_dep INTEGER, -- Geology deposit code
|
|
geo_flag INTEGER, -- Geology flag code
|
|
occ_flag INTEGER, -- RFC-TESSERA-3.0-OCC-001 Section 2 code
|
|
|
|
-- Provenance per field (source FK + confidence FK)
|
|
elev_src INTEGER REFERENCES source_registry(id),
|
|
elev_conf INTEGER REFERENCES confidence_grades(id),
|
|
terr_src INTEGER REFERENCES source_registry(id),
|
|
terr_conf INTEGER REFERENCES confidence_grades(id),
|
|
hydro_src INTEGER REFERENCES source_registry(id),
|
|
hydro_conf INTEGER REFERENCES confidence_grades(id),
|
|
gdep_src INTEGER REFERENCES source_registry(id),
|
|
gdep_conf INTEGER REFERENCES confidence_grades(id),
|
|
gflag_src INTEGER REFERENCES source_registry(id),
|
|
gflag_conf INTEGER REFERENCES confidence_grades(id),
|
|
occ_src INTEGER REFERENCES source_registry(id),
|
|
occ_conf INTEGER REFERENCES confidence_grades(id),
|
|
|
|
-- Lifecycle
|
|
status INTEGER NOT NULL DEFAULT 1
|
|
REFERENCES lifecycle_states(id),
|
|
run_id INTEGER NOT NULL REFERENCES pipeline_runs(id),
|
|
created_at TEXT NOT NULL, -- ISO 8601 UTC
|
|
superseded_by INTEGER REFERENCES tessera_cells(id),
|
|
retired_reason TEXT
|
|
);
|
|
|
|
CREATE INDEX idx_cells_h9_status ON tessera_cells(h9, status);
|
|
CREATE INDEX idx_cells_h5_status ON tessera_cells(h5, status);
|
|
CREATE INDEX idx_cells_h7_status ON tessera_cells(h7, status);
|
|
CREATE INDEX idx_cells_run ON tessera_cells(run_id);
|
|
|
|
-- ---------------------------------------------------------------
|
|
-- 4. H5 coverage completeness tracking
|
|
-- ---------------------------------------------------------------
|
|
|
|
CREATE TABLE h5_coverage (
|
|
h5 INTEGER PRIMARY KEY, -- H3 res-5 index
|
|
status INTEGER NOT NULL REFERENCES lifecycle_states(id),
|
|
-- 1=draft (in progress), 2=current (complete), 4=retired
|
|
h9_total INTEGER NOT NULL, -- Expected H9 count (typically 2401)
|
|
h9_current INTEGER NOT NULL DEFAULT 0,
|
|
last_updated TEXT NOT NULL, -- ISO 8601 UTC
|
|
run_id INTEGER NOT NULL REFERENCES pipeline_runs(id),
|
|
notes TEXT
|
|
);
|