Add TESSERA 4.0 schema script and ignore sqlite3 artifacts

This commit is contained in:
otivm
2026-04-26 06:49:17 +00:00
parent 865c9b0637
commit 89e977f167
2 changed files with 177 additions and 0 deletions

173
data/create_otivm_db.sql Normal file
View File

@@ -0,0 +1,173 @@
PRAGMA journal_mode = WAL;
PRAGMA foreign_keys = ON;
-- ---------------------------------------------------------------
-- 1. Lookup tables (written once, never modified)
-- ---------------------------------------------------------------
CREATE TABLE lifecycle_states (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE
);
INSERT INTO lifecycle_states VALUES
(1, 'draft'),
(2, 'current'),
(3, 'superseded'),
(4, 'retired');
CREATE TABLE confidence_grades (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
description TEXT NOT NULL
);
INSERT INTO confidence_grades VALUES
(1, 'measured', 'Directly observed or instrumentally measured. Published dataset with explicit methodology.'),
(2, 'indicated', 'Recorded in registry or survey without direct measurement. Classification may be broad.'),
(3, 'inferred', 'Derived from landscape position, proximity to measured cells, or modelled from adjacent data.'),
(4, 'no_data', 'Source dataset has no coverage for this cell. Field value is a known placeholder.');
CREATE TABLE source_registry (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_key TEXT NOT NULL UNIQUE, -- e.g. 'GEBCO_2025'
source_name TEXT NOT NULL,
source_url TEXT,
version TEXT NOT NULL,
license TEXT,
citation TEXT,
registered_at TEXT NOT NULL -- ISO 8601 UTC
);
INSERT INTO source_registry (source_key, source_name, source_url, version, license, citation, registered_at) VALUES
('GEBCO_2025',
'GEBCO 2025 Grid',
'https://www.gebco.net/data_and_products/gridded_bathymetry_data/',
'2025',
'CC-BY 4.0',
'GEBCO Compilation Group (2025) GEBCO 2025 Grid (doi:10.5285/a29c5465-b138-234d-e053-6c86abc0dc7f)',
'2026-04-26T00:00:00Z'),
('ESA_WORLDCOVER_V200',
'ESA WorldCover v200',
'https://esa-worldcover.org/',
'v2.0.0',
'CC-BY 4.0',
'Zanaga et al. (2022) ESA WorldCover 10m 2021 v200 (doi:10.5281/zenodo.7254221)',
'2026-04-26T00:00:00Z'),
('HYDROSHEDS_V11',
'HydroSHEDS v1.1',
'https://www.hydrosheds.org/',
'1.1',
'CC-BY 4.0',
'Lehner et al. (2022) HydroSHEDS v1.1 Technical Documentation. WWF US, Washington DC.',
'2026-04-26T00:00:00Z'),
('USGS_MRDS',
'USGS Mineral Resources Data System',
'https://mrdata.usgs.gov/mrds/',
'2022-08-23',
'public domain',
'USGS (2022) Mineral Resources Data System (MRDS). U.S. Geological Survey Data Release.',
'2026-04-26T00:00:00Z'),
('BGR_IGME5000',
'BGR IGME5000',
'https://www.bgr.bund.de/igme5000/',
'2007',
'Geonutz 2013',
'Asch K. (2005) The 1:5 Million International Geological Map of Europe and Adjacent Areas. BGR, Hannover.',
'2026-04-26T00:00:00Z'),
('TESSERA3_SEED',
'TESSERA 3.0 seed extraction',
NULL,
'2026-04-26',
'internal',
'TheRON/tesserav3 pipeline, tessera.db SpatiaLite export, 2026-04-26.',
'2026-04-26T00:00:00Z');
-- ---------------------------------------------------------------
-- 2. Pipeline tracking
-- ---------------------------------------------------------------
CREATE TABLE pipeline_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_key TEXT NOT NULL UNIQUE, -- e.g. 'tessera3-seed-2026-04-26'
started_at TEXT NOT NULL, -- ISO 8601 UTC
completed_at TEXT, -- NULL while running
status INTEGER NOT NULL REFERENCES lifecycle_states(id),
h5_cells TEXT NOT NULL, -- JSON array of H3 res-5 integer IDs
fields_updated TEXT NOT NULL, -- JSON array of field names
source_versions TEXT NOT NULL, -- JSON object: {source_key: version}
row_count INTEGER, -- NULL while running
notes TEXT
);
-- ---------------------------------------------------------------
-- 3. Core cell table
-- ---------------------------------------------------------------
-- H3 cell IDs are stored as INTEGER (64-bit H3 index), not TEXT.
-- Use h3.str_to_int() / h3.int_to_str() in Python for conversion.
CREATE TABLE tessera_cells (
-- Identity
id INTEGER PRIMARY KEY AUTOINCREMENT,
h9 INTEGER NOT NULL, -- H3 res-9 index (64-bit)
h7 INTEGER NOT NULL, -- H3 res-7 parent index
h5 INTEGER NOT NULL, -- H3 res-5 grandparent index (waypoint)
lat REAL NOT NULL, -- H9 centroid latitude
lon REAL NOT NULL, -- H9 centroid longitude
-- Physical fields (RFC-TESSERA-2.0-001 byte layout preserved)
elev_cm INTEGER, -- Elevation in cm, signed 24-bit range
terrain INTEGER, -- Appendix A terrain code
hydro INTEGER, -- Hydrology code
geo_dep INTEGER, -- Geology deposit code
geo_flag INTEGER, -- Geology flag code
occ_flag INTEGER, -- RFC-TESSERA-3.0-OCC-001 Section 2 code
-- Provenance per field (source FK + confidence FK)
elev_src INTEGER REFERENCES source_registry(id),
elev_conf INTEGER REFERENCES confidence_grades(id),
terr_src INTEGER REFERENCES source_registry(id),
terr_conf INTEGER REFERENCES confidence_grades(id),
hydro_src INTEGER REFERENCES source_registry(id),
hydro_conf INTEGER REFERENCES confidence_grades(id),
gdep_src INTEGER REFERENCES source_registry(id),
gdep_conf INTEGER REFERENCES confidence_grades(id),
gflag_src INTEGER REFERENCES source_registry(id),
gflag_conf INTEGER REFERENCES confidence_grades(id),
occ_src INTEGER REFERENCES source_registry(id),
occ_conf INTEGER REFERENCES confidence_grades(id),
-- Lifecycle
status INTEGER NOT NULL DEFAULT 1
REFERENCES lifecycle_states(id),
run_id INTEGER NOT NULL REFERENCES pipeline_runs(id),
created_at TEXT NOT NULL, -- ISO 8601 UTC
superseded_by INTEGER REFERENCES tessera_cells(id),
retired_reason TEXT
);
CREATE INDEX idx_cells_h9_status ON tessera_cells(h9, status);
CREATE INDEX idx_cells_h5_status ON tessera_cells(h5, status);
CREATE INDEX idx_cells_h7_status ON tessera_cells(h7, status);
CREATE INDEX idx_cells_run ON tessera_cells(run_id);
-- ---------------------------------------------------------------
-- 4. H5 coverage completeness tracking
-- ---------------------------------------------------------------
CREATE TABLE h5_coverage (
h5 INTEGER PRIMARY KEY, -- H3 res-5 index
status INTEGER NOT NULL REFERENCES lifecycle_states(id),
-- 1=draft (in progress), 2=current (complete), 4=retired
h9_total INTEGER NOT NULL, -- Expected H9 count (typically 2401)
h9_current INTEGER NOT NULL DEFAULT 0,
last_updated TEXT NOT NULL, -- ISO 8601 UTC
run_id INTEGER NOT NULL REFERENCES pipeline_runs(id),
notes TEXT
);