From 62daa0c34fc159df6da118568e23a9435d1c17bf Mon Sep 17 00:00:00 2001 From: otivm Date: Sun, 3 May 2026 14:32:50 +0000 Subject: [PATCH] fix: remove accidentally committed files, update gitignore --- benchmark_tessera_world_usb.sh | 244 -- data/otivm.sqlite3-shm | Bin 32768 -> 0 bytes data/otivm.sqlite3-wal | 0 data/tessera_usb_inventory.txt | 581 ---- data/tessera_world_usb_inventory.txt | 4762 -------------------------- 5 files changed, 5587 deletions(-) delete mode 100755 benchmark_tessera_world_usb.sh delete mode 100644 data/otivm.sqlite3-shm delete mode 100644 data/otivm.sqlite3-wal delete mode 100644 data/tessera_usb_inventory.txt delete mode 100644 data/tessera_world_usb_inventory.txt diff --git a/benchmark_tessera_world_usb.sh b/benchmark_tessera_world_usb.sh deleted file mode 100755 index f64e784..0000000 --- a/benchmark_tessera_world_usb.sh +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/env bash -set -u -set -o pipefail - -MOUNT="/opt/data/TESSERA_WORLD" -OUTDIR="/home/otivm/OTIVM/data" -OUTFILE="$OUTDIR/tessera_world_usb_inventory.txt" -PYTHON="/home/otivm/pipeline-venv/bin/python3" - -mkdir -p "$OUTDIR" - -{ -echo "TESSERA WORLD USB Drive Inventory and Benchmark" -echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ)" -echo "Mount: $MOUNT" -echo "" - -echo "=== PHASE 0: MOUNT CHECK ===" -echo "--- Step 1: Mount contents ---" -ls "$MOUNT" || exit 1 -echo "" - -echo "--- Step 2: Mount details ---" -df -h "$MOUNT" -mount | grep TESSERA_WORLD || true -echo "" - -echo "--- Step 3: Read-only status ---" -touch "$MOUNT/test_write_attempt" 2>&1 || echo "CONFIRMED READ-ONLY" -echo "" - -echo "=== PHASE 1: TOP-LEVEL STRUCTURE ===" -echo "--- Step 4: Top-level directory listing ---" -ls -lah "$MOUNT/" -echo "" - -echo "--- Step 5: Recursive directory tree, directories only ---" -find "$MOUNT" -type d | sort -echo "" - -echo "--- Step 6: File count by directory ---" -find "$MOUNT" -type f | sed 's|/[^/]*$||' | sort | uniq -c | sort -rn -echo "" - -echo "=== PHASE 2: FILE TYPE CENSUS ===" -echo "--- Step 7: File extensions and counts ---" -find "$MOUNT" -type f | sed 's|.*\.||' | sort | uniq -c | sort -rn -echo "" - -echo "--- Step 8: Total size by top-level subdirectory ---" -du -sh "$MOUNT"/*/ 2>/dev/null | sort -rh -echo "" - -echo "--- Step 9: Total size of mount ---" -du -sh "$MOUNT/" -echo "" - -echo "=== PHASE 3: PER-DATASET INVENTORY ===" -echo "--- Step 10: GeoTIFF files ---" -find "$MOUNT" -type f \( -name "*.tif" -o -name "*.tiff" \) -print0 \ - | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' | sort -rh -echo "" - -echo "--- Step 11: HDF5 files ---" -find "$MOUNT" -type f \( -name "*.h5" -o -name "*.hdf5" \) -print0 \ - | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' -echo "" - -echo "--- Step 12: NetCDF files ---" -find "$MOUNT" -type f -name "*.nc" -print0 \ - | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' -echo "" - -echo "--- Step 13: CSV and TSV files ---" -find "$MOUNT" -type f \( -name "*.csv" -o -name "*.tsv" \) -print0 \ - | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' -echo "" - -echo "--- Step 14: SQLite files ---" -find "$MOUNT" -type f \( -name "*.sqlite" -o -name "*.sqlite3" -o -name "*.db" \) -print0 \ - | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' -echo "" - -echo "--- Step 15: Shapefile components ---" -find "$MOUNT" -type f \( -name "*.shp" -o -name "*.dbf" -o -name "*.shx" -o -name "*.prj" \) -print0 \ - | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' | sort -echo "" - -echo "--- Step 16: Other file types ---" -find "$MOUNT" -type f \ - ! -name "*.tif" ! -name "*.tiff" ! -name "*.h5" ! -name "*.hdf5" \ - ! -name "*.nc" ! -name "*.csv" ! -name "*.tsv" \ - ! -name "*.sqlite" ! -name "*.sqlite3" ! -name "*.db" \ - ! -name "*.shp" ! -name "*.dbf" ! -name "*.shx" ! -name "*.prj" \ - -print0 | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' | sort -echo "" - -echo "=== PHASE 4: GEOTIFF METADATA ===" -"$PYTHON" - <<'PYEOF' -import glob -import os - -try: - import rasterio -except Exception as e: - print(f"ERROR: rasterio unavailable: {e}") - raise SystemExit(0) - -patterns = [ - "/opt/data/TESSERA_WORLD/**/*.tif", - "/opt/data/TESSERA_WORLD/**/*.tiff", -] - -files = [] -for p in patterns: - files.extend(glob.glob(p, recursive=True)) -files.sort() - -if not files: - print("No GeoTIFF files found.") -else: - for f in files: - try: - with rasterio.open(f) as ds: - print("---") - print(f"FILE: {f}") - print(f"SIZE: {os.path.getsize(f) / 1e9:.2f} GB") - print(f"CRS: {ds.crs}") - print(f"RES: {ds.res}") - print(f"BOUNDS: {ds.bounds}") - print(f"SHAPE: {ds.width} x {ds.height} px") - print(f"BANDS: {ds.count}") - print(f"DTYPE: {ds.dtypes}") - print(f"NODATA: {ds.nodata}") - except Exception as e: - print(f"ERROR reading {f}: {e}") -PYEOF -echo "" - -echo "=== PHASE 5: READ SPEED BENCHMARKS ===" -echo "--- Step 18: Raw sequential read speed of largest GeoTIFF ---" -LARGEST=$(find "$MOUNT" -type f \( -name "*.tif" -o -name "*.tiff" \) -print0 \ - | xargs -0 -r ls -s 2>/dev/null | sort -rn | head -1 | awk '{print $2}') -echo "Benchmarking: $LARGEST" -if [ -n "${LARGEST:-}" ]; then - dd if="$LARGEST" of=/dev/null bs=1M status=progress 2>&1 | tail -3 -else - echo "No GeoTIFF found for sequential benchmark." -fi -echo "" - -echo "--- Step 19: Random point sample speed, 2401 points from first GeoTIFF ---" -"$PYTHON" - <<'PYEOF' -import glob -import time - -try: - import rasterio - import numpy as np -except Exception as e: - print(f"ERROR: rasterio/numpy unavailable: {e}") - raise SystemExit(0) - -files = glob.glob("/opt/data/TESSERA_WORLD/**/*.tif", recursive=True) -files += glob.glob("/opt/data/TESSERA_WORLD/**/*.tiff", recursive=True) - -if not files: - print("No GeoTIFF found for benchmark.") -else: - f = sorted(files)[0] - print(f"Benchmarking random point reads from: {f}") - with rasterio.open(f) as ds: - bounds = ds.bounds - rng = np.random.default_rng(42) - lons = rng.uniform(bounds.left, bounds.right, 2401) - lats = rng.uniform(bounds.bottom, bounds.top, 2401) - coords = list(zip(lons, lats)) - - t0 = time.perf_counter() - results = list(ds.sample(coords)) - t1 = time.perf_counter() - - elapsed = t1 - t0 - print(f"2401 point samples: {elapsed:.3f}s") - print(f"Per-point: {elapsed/2401*1000:.3f}ms") - print(f"Projected H5 time: {elapsed:.1f}s per H5 hex") - print(f"Five H5s: {elapsed*5:.1f}s total") -PYEOF -echo "" - -echo "--- Step 20: SQLite write speed baseline on local disk ---" -"$PYTHON" - <<'PYEOF' -import sqlite3 -import time -import tempfile -import os - -tmp = tempfile.mktemp(suffix=".sqlite3") -con = sqlite3.connect(tmp) -con.execute("PRAGMA journal_mode=WAL") -con.execute("PRAGMA synchronous=NORMAL") -con.execute(""" - CREATE TABLE bench ( - id INTEGER PRIMARY KEY, - h9 INTEGER, h7 INTEGER, h5 INTEGER, - lat REAL, lon REAL, - elev_cm INTEGER, terrain INTEGER, hydro INTEGER, - geo_dep INTEGER, geo_flag INTEGER, occ_flag INTEGER, - status INTEGER, run_id INTEGER, created_at TEXT - ) -""") - -rows = [ - (i, i*10, i*100, i*1000, - 40.0 + i*0.001, 12.0 + i*0.001, - 100, 1, 0, 255, 0, 0, - 1, 1, "2026-04-26T00:00:00Z") - for i in range(2401) -] - -t0 = time.perf_counter() -con.executemany(""" - INSERT INTO bench - (id, h9, h7, h5, lat, lon, elev_cm, terrain, hydro, - geo_dep, geo_flag, occ_flag, status, run_id, created_at) - VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?) -""", rows) -con.commit() -t1 = time.perf_counter() - -elapsed = t1 - t0 -print(f"2401 row INSERT+COMMIT: {elapsed:.3f}s") -print(f"Per-row: {elapsed/2401*1000:.3f}ms") -con.close() -os.unlink(tmp) -PYEOF -echo "" - -echo "=== PHASE 6: REPORT LOCATION ===" -echo "Inventory written to $OUTFILE" -} | tee "$OUTFILE" - -echo "" -wc -l "$OUTFILE" diff --git a/data/otivm.sqlite3-shm b/data/otivm.sqlite3-shm deleted file mode 100644 index fe9ac2845eca6fe6da8a63cd096d9cf9e24ece10..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 32768 zcmeIuAr62r3