Add TESSERA 4.0 schema script and ignore sqlite3 artifacts
This commit is contained in:
173
data/create_otivm_db.sql
Normal file
173
data/create_otivm_db.sql
Normal file
@@ -0,0 +1,173 @@
|
||||
PRAGMA journal_mode = WAL;
|
||||
PRAGMA foreign_keys = ON;
|
||||
|
||||
-- ---------------------------------------------------------------
|
||||
-- 1. Lookup tables (written once, never modified)
|
||||
-- ---------------------------------------------------------------
|
||||
|
||||
CREATE TABLE lifecycle_states (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL UNIQUE
|
||||
);
|
||||
|
||||
INSERT INTO lifecycle_states VALUES
|
||||
(1, 'draft'),
|
||||
(2, 'current'),
|
||||
(3, 'superseded'),
|
||||
(4, 'retired');
|
||||
|
||||
CREATE TABLE confidence_grades (
|
||||
id INTEGER PRIMARY KEY,
|
||||
name TEXT NOT NULL UNIQUE,
|
||||
description TEXT NOT NULL
|
||||
);
|
||||
|
||||
INSERT INTO confidence_grades VALUES
|
||||
(1, 'measured', 'Directly observed or instrumentally measured. Published dataset with explicit methodology.'),
|
||||
(2, 'indicated', 'Recorded in registry or survey without direct measurement. Classification may be broad.'),
|
||||
(3, 'inferred', 'Derived from landscape position, proximity to measured cells, or modelled from adjacent data.'),
|
||||
(4, 'no_data', 'Source dataset has no coverage for this cell. Field value is a known placeholder.');
|
||||
|
||||
CREATE TABLE source_registry (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
source_key TEXT NOT NULL UNIQUE, -- e.g. 'GEBCO_2025'
|
||||
source_name TEXT NOT NULL,
|
||||
source_url TEXT,
|
||||
version TEXT NOT NULL,
|
||||
license TEXT,
|
||||
citation TEXT,
|
||||
registered_at TEXT NOT NULL -- ISO 8601 UTC
|
||||
);
|
||||
|
||||
INSERT INTO source_registry (source_key, source_name, source_url, version, license, citation, registered_at) VALUES
|
||||
('GEBCO_2025',
|
||||
'GEBCO 2025 Grid',
|
||||
'https://www.gebco.net/data_and_products/gridded_bathymetry_data/',
|
||||
'2025',
|
||||
'CC-BY 4.0',
|
||||
'GEBCO Compilation Group (2025) GEBCO 2025 Grid (doi:10.5285/a29c5465-b138-234d-e053-6c86abc0dc7f)',
|
||||
'2026-04-26T00:00:00Z'),
|
||||
|
||||
('ESA_WORLDCOVER_V200',
|
||||
'ESA WorldCover v200',
|
||||
'https://esa-worldcover.org/',
|
||||
'v2.0.0',
|
||||
'CC-BY 4.0',
|
||||
'Zanaga et al. (2022) ESA WorldCover 10m 2021 v200 (doi:10.5281/zenodo.7254221)',
|
||||
'2026-04-26T00:00:00Z'),
|
||||
|
||||
('HYDROSHEDS_V11',
|
||||
'HydroSHEDS v1.1',
|
||||
'https://www.hydrosheds.org/',
|
||||
'1.1',
|
||||
'CC-BY 4.0',
|
||||
'Lehner et al. (2022) HydroSHEDS v1.1 Technical Documentation. WWF US, Washington DC.',
|
||||
'2026-04-26T00:00:00Z'),
|
||||
|
||||
('USGS_MRDS',
|
||||
'USGS Mineral Resources Data System',
|
||||
'https://mrdata.usgs.gov/mrds/',
|
||||
'2022-08-23',
|
||||
'public domain',
|
||||
'USGS (2022) Mineral Resources Data System (MRDS). U.S. Geological Survey Data Release.',
|
||||
'2026-04-26T00:00:00Z'),
|
||||
|
||||
('BGR_IGME5000',
|
||||
'BGR IGME5000',
|
||||
'https://www.bgr.bund.de/igme5000/',
|
||||
'2007',
|
||||
'Geonutz 2013',
|
||||
'Asch K. (2005) The 1:5 Million International Geological Map of Europe and Adjacent Areas. BGR, Hannover.',
|
||||
'2026-04-26T00:00:00Z'),
|
||||
|
||||
('TESSERA3_SEED',
|
||||
'TESSERA 3.0 seed extraction',
|
||||
NULL,
|
||||
'2026-04-26',
|
||||
'internal',
|
||||
'TheRON/tesserav3 pipeline, tessera.db SpatiaLite export, 2026-04-26.',
|
||||
'2026-04-26T00:00:00Z');
|
||||
|
||||
-- ---------------------------------------------------------------
|
||||
-- 2. Pipeline tracking
|
||||
-- ---------------------------------------------------------------
|
||||
|
||||
CREATE TABLE pipeline_runs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
run_key TEXT NOT NULL UNIQUE, -- e.g. 'tessera3-seed-2026-04-26'
|
||||
started_at TEXT NOT NULL, -- ISO 8601 UTC
|
||||
completed_at TEXT, -- NULL while running
|
||||
status INTEGER NOT NULL REFERENCES lifecycle_states(id),
|
||||
h5_cells TEXT NOT NULL, -- JSON array of H3 res-5 integer IDs
|
||||
fields_updated TEXT NOT NULL, -- JSON array of field names
|
||||
source_versions TEXT NOT NULL, -- JSON object: {source_key: version}
|
||||
row_count INTEGER, -- NULL while running
|
||||
notes TEXT
|
||||
);
|
||||
|
||||
-- ---------------------------------------------------------------
|
||||
-- 3. Core cell table
|
||||
-- ---------------------------------------------------------------
|
||||
|
||||
-- H3 cell IDs are stored as INTEGER (64-bit H3 index), not TEXT.
|
||||
-- Use h3.str_to_int() / h3.int_to_str() in Python for conversion.
|
||||
|
||||
CREATE TABLE tessera_cells (
|
||||
-- Identity
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
h9 INTEGER NOT NULL, -- H3 res-9 index (64-bit)
|
||||
h7 INTEGER NOT NULL, -- H3 res-7 parent index
|
||||
h5 INTEGER NOT NULL, -- H3 res-5 grandparent index (waypoint)
|
||||
lat REAL NOT NULL, -- H9 centroid latitude
|
||||
lon REAL NOT NULL, -- H9 centroid longitude
|
||||
|
||||
-- Physical fields (RFC-TESSERA-2.0-001 byte layout preserved)
|
||||
elev_cm INTEGER, -- Elevation in cm, signed 24-bit range
|
||||
terrain INTEGER, -- Appendix A terrain code
|
||||
hydro INTEGER, -- Hydrology code
|
||||
geo_dep INTEGER, -- Geology deposit code
|
||||
geo_flag INTEGER, -- Geology flag code
|
||||
occ_flag INTEGER, -- RFC-TESSERA-3.0-OCC-001 Section 2 code
|
||||
|
||||
-- Provenance per field (source FK + confidence FK)
|
||||
elev_src INTEGER REFERENCES source_registry(id),
|
||||
elev_conf INTEGER REFERENCES confidence_grades(id),
|
||||
terr_src INTEGER REFERENCES source_registry(id),
|
||||
terr_conf INTEGER REFERENCES confidence_grades(id),
|
||||
hydro_src INTEGER REFERENCES source_registry(id),
|
||||
hydro_conf INTEGER REFERENCES confidence_grades(id),
|
||||
gdep_src INTEGER REFERENCES source_registry(id),
|
||||
gdep_conf INTEGER REFERENCES confidence_grades(id),
|
||||
gflag_src INTEGER REFERENCES source_registry(id),
|
||||
gflag_conf INTEGER REFERENCES confidence_grades(id),
|
||||
occ_src INTEGER REFERENCES source_registry(id),
|
||||
occ_conf INTEGER REFERENCES confidence_grades(id),
|
||||
|
||||
-- Lifecycle
|
||||
status INTEGER NOT NULL DEFAULT 1
|
||||
REFERENCES lifecycle_states(id),
|
||||
run_id INTEGER NOT NULL REFERENCES pipeline_runs(id),
|
||||
created_at TEXT NOT NULL, -- ISO 8601 UTC
|
||||
superseded_by INTEGER REFERENCES tessera_cells(id),
|
||||
retired_reason TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_cells_h9_status ON tessera_cells(h9, status);
|
||||
CREATE INDEX idx_cells_h5_status ON tessera_cells(h5, status);
|
||||
CREATE INDEX idx_cells_h7_status ON tessera_cells(h7, status);
|
||||
CREATE INDEX idx_cells_run ON tessera_cells(run_id);
|
||||
|
||||
-- ---------------------------------------------------------------
|
||||
-- 4. H5 coverage completeness tracking
|
||||
-- ---------------------------------------------------------------
|
||||
|
||||
CREATE TABLE h5_coverage (
|
||||
h5 INTEGER PRIMARY KEY, -- H3 res-5 index
|
||||
status INTEGER NOT NULL REFERENCES lifecycle_states(id),
|
||||
-- 1=draft (in progress), 2=current (complete), 4=retired
|
||||
h9_total INTEGER NOT NULL, -- Expected H9 count (typically 2401)
|
||||
h9_current INTEGER NOT NULL DEFAULT 0,
|
||||
last_updated TEXT NOT NULL, -- ISO 8601 UTC
|
||||
run_id INTEGER NOT NULL REFERENCES pipeline_runs(id),
|
||||
notes TEXT
|
||||
);
|
||||
Reference in New Issue
Block a user