From 4018f006cb8cbc9c7a13275ead4e04c3cef58b9c Mon Sep 17 00:00:00 2001 From: otivm Date: Sun, 3 May 2026 14:31:21 +0000 Subject: [PATCH] Switching to BS5 --- benchmark_tessera_world_usb.sh | 244 ++ data/otivm.sqlite3-shm | Bin 0 -> 32768 bytes data/otivm.sqlite3-wal | 0 data/tessera_usb_inventory.txt | 581 ++++ data/tessera_world_usb_inventory.txt | 4762 ++++++++++++++++++++++++++ src/App.css | 1015 +++--- 6 files changed, 6082 insertions(+), 520 deletions(-) create mode 100755 benchmark_tessera_world_usb.sh create mode 100644 data/otivm.sqlite3-shm create mode 100644 data/otivm.sqlite3-wal create mode 100644 data/tessera_usb_inventory.txt create mode 100644 data/tessera_world_usb_inventory.txt diff --git a/benchmark_tessera_world_usb.sh b/benchmark_tessera_world_usb.sh new file mode 100755 index 0000000..f64e784 --- /dev/null +++ b/benchmark_tessera_world_usb.sh @@ -0,0 +1,244 @@ +#!/usr/bin/env bash +set -u +set -o pipefail + +MOUNT="/opt/data/TESSERA_WORLD" +OUTDIR="/home/otivm/OTIVM/data" +OUTFILE="$OUTDIR/tessera_world_usb_inventory.txt" +PYTHON="/home/otivm/pipeline-venv/bin/python3" + +mkdir -p "$OUTDIR" + +{ +echo "TESSERA WORLD USB Drive Inventory and Benchmark" +echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ)" +echo "Mount: $MOUNT" +echo "" + +echo "=== PHASE 0: MOUNT CHECK ===" +echo "--- Step 1: Mount contents ---" +ls "$MOUNT" || exit 1 +echo "" + +echo "--- Step 2: Mount details ---" +df -h "$MOUNT" +mount | grep TESSERA_WORLD || true +echo "" + +echo "--- Step 3: Read-only status ---" +touch "$MOUNT/test_write_attempt" 2>&1 || echo "CONFIRMED READ-ONLY" +echo "" + +echo "=== PHASE 1: TOP-LEVEL STRUCTURE ===" +echo "--- Step 4: Top-level directory listing ---" +ls -lah "$MOUNT/" +echo "" + +echo "--- Step 5: Recursive directory tree, directories only ---" +find "$MOUNT" -type d | sort +echo "" + +echo "--- Step 6: File count by directory ---" +find "$MOUNT" -type f | sed 's|/[^/]*$||' | sort | uniq -c | sort -rn +echo "" + +echo "=== PHASE 2: FILE TYPE CENSUS ===" +echo "--- Step 7: File extensions and counts ---" +find "$MOUNT" -type f | sed 's|.*\.||' | sort | uniq -c | sort -rn +echo "" + +echo "--- Step 8: Total size by top-level subdirectory ---" +du -sh "$MOUNT"/*/ 2>/dev/null | sort -rh +echo "" + +echo "--- Step 9: Total size of mount ---" +du -sh "$MOUNT/" +echo "" + +echo "=== PHASE 3: PER-DATASET INVENTORY ===" +echo "--- Step 10: GeoTIFF files ---" +find "$MOUNT" -type f \( -name "*.tif" -o -name "*.tiff" \) -print0 \ + | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' | sort -rh +echo "" + +echo "--- Step 11: HDF5 files ---" +find "$MOUNT" -type f \( -name "*.h5" -o -name "*.hdf5" \) -print0 \ + | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' +echo "" + +echo "--- Step 12: NetCDF files ---" +find "$MOUNT" -type f -name "*.nc" -print0 \ + | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' +echo "" + +echo "--- Step 13: CSV and TSV files ---" +find "$MOUNT" -type f \( -name "*.csv" -o -name "*.tsv" \) -print0 \ + | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' +echo "" + +echo "--- Step 14: SQLite files ---" +find "$MOUNT" -type f \( -name "*.sqlite" -o -name "*.sqlite3" -o -name "*.db" \) -print0 \ + | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' +echo "" + +echo "--- Step 15: Shapefile components ---" +find "$MOUNT" -type f \( -name "*.shp" -o -name "*.dbf" -o -name "*.shx" -o -name "*.prj" \) -print0 \ + | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' | sort +echo "" + +echo "--- Step 16: Other file types ---" +find "$MOUNT" -type f \ + ! -name "*.tif" ! -name "*.tiff" ! -name "*.h5" ! -name "*.hdf5" \ + ! -name "*.nc" ! -name "*.csv" ! -name "*.tsv" \ + ! -name "*.sqlite" ! -name "*.sqlite3" ! -name "*.db" \ + ! -name "*.shp" ! -name "*.dbf" ! -name "*.shx" ! -name "*.prj" \ + -print0 | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' | sort +echo "" + +echo "=== PHASE 4: GEOTIFF METADATA ===" +"$PYTHON" - <<'PYEOF' +import glob +import os + +try: + import rasterio +except Exception as e: + print(f"ERROR: rasterio unavailable: {e}") + raise SystemExit(0) + +patterns = [ + "/opt/data/TESSERA_WORLD/**/*.tif", + "/opt/data/TESSERA_WORLD/**/*.tiff", +] + +files = [] +for p in patterns: + files.extend(glob.glob(p, recursive=True)) +files.sort() + +if not files: + print("No GeoTIFF files found.") +else: + for f in files: + try: + with rasterio.open(f) as ds: + print("---") + print(f"FILE: {f}") + print(f"SIZE: {os.path.getsize(f) / 1e9:.2f} GB") + print(f"CRS: {ds.crs}") + print(f"RES: {ds.res}") + print(f"BOUNDS: {ds.bounds}") + print(f"SHAPE: {ds.width} x {ds.height} px") + print(f"BANDS: {ds.count}") + print(f"DTYPE: {ds.dtypes}") + print(f"NODATA: {ds.nodata}") + except Exception as e: + print(f"ERROR reading {f}: {e}") +PYEOF +echo "" + +echo "=== PHASE 5: READ SPEED BENCHMARKS ===" +echo "--- Step 18: Raw sequential read speed of largest GeoTIFF ---" +LARGEST=$(find "$MOUNT" -type f \( -name "*.tif" -o -name "*.tiff" \) -print0 \ + | xargs -0 -r ls -s 2>/dev/null | sort -rn | head -1 | awk '{print $2}') +echo "Benchmarking: $LARGEST" +if [ -n "${LARGEST:-}" ]; then + dd if="$LARGEST" of=/dev/null bs=1M status=progress 2>&1 | tail -3 +else + echo "No GeoTIFF found for sequential benchmark." +fi +echo "" + +echo "--- Step 19: Random point sample speed, 2401 points from first GeoTIFF ---" +"$PYTHON" - <<'PYEOF' +import glob +import time + +try: + import rasterio + import numpy as np +except Exception as e: + print(f"ERROR: rasterio/numpy unavailable: {e}") + raise SystemExit(0) + +files = glob.glob("/opt/data/TESSERA_WORLD/**/*.tif", recursive=True) +files += glob.glob("/opt/data/TESSERA_WORLD/**/*.tiff", recursive=True) + +if not files: + print("No GeoTIFF found for benchmark.") +else: + f = sorted(files)[0] + print(f"Benchmarking random point reads from: {f}") + with rasterio.open(f) as ds: + bounds = ds.bounds + rng = np.random.default_rng(42) + lons = rng.uniform(bounds.left, bounds.right, 2401) + lats = rng.uniform(bounds.bottom, bounds.top, 2401) + coords = list(zip(lons, lats)) + + t0 = time.perf_counter() + results = list(ds.sample(coords)) + t1 = time.perf_counter() + + elapsed = t1 - t0 + print(f"2401 point samples: {elapsed:.3f}s") + print(f"Per-point: {elapsed/2401*1000:.3f}ms") + print(f"Projected H5 time: {elapsed:.1f}s per H5 hex") + print(f"Five H5s: {elapsed*5:.1f}s total") +PYEOF +echo "" + +echo "--- Step 20: SQLite write speed baseline on local disk ---" +"$PYTHON" - <<'PYEOF' +import sqlite3 +import time +import tempfile +import os + +tmp = tempfile.mktemp(suffix=".sqlite3") +con = sqlite3.connect(tmp) +con.execute("PRAGMA journal_mode=WAL") +con.execute("PRAGMA synchronous=NORMAL") +con.execute(""" + CREATE TABLE bench ( + id INTEGER PRIMARY KEY, + h9 INTEGER, h7 INTEGER, h5 INTEGER, + lat REAL, lon REAL, + elev_cm INTEGER, terrain INTEGER, hydro INTEGER, + geo_dep INTEGER, geo_flag INTEGER, occ_flag INTEGER, + status INTEGER, run_id INTEGER, created_at TEXT + ) +""") + +rows = [ + (i, i*10, i*100, i*1000, + 40.0 + i*0.001, 12.0 + i*0.001, + 100, 1, 0, 255, 0, 0, + 1, 1, "2026-04-26T00:00:00Z") + for i in range(2401) +] + +t0 = time.perf_counter() +con.executemany(""" + INSERT INTO bench + (id, h9, h7, h5, lat, lon, elev_cm, terrain, hydro, + geo_dep, geo_flag, occ_flag, status, run_id, created_at) + VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) +""", rows) +con.commit() +t1 = time.perf_counter() + +elapsed = t1 - t0 +print(f"2401 row INSERT+COMMIT: {elapsed:.3f}s") +print(f"Per-row: {elapsed/2401*1000:.3f}ms") +con.close() +os.unlink(tmp) +PYEOF +echo "" + +echo "=== PHASE 6: REPORT LOCATION ===" +echo "Inventory written to $OUTFILE" +} | tee "$OUTFILE" + +echo "" +wc -l "$OUTFILE" diff --git a/data/otivm.sqlite3-shm b/data/otivm.sqlite3-shm new file mode 100644 index 0000000000000000000000000000000000000000..fe9ac2845eca6fe6da8a63cd096d9cf9e24ece10 GIT binary patch literal 32768 zcmeIuAr62r3