Switching to BS5
This commit is contained in:
244
benchmark_tessera_world_usb.sh
Executable file
244
benchmark_tessera_world_usb.sh
Executable file
@@ -0,0 +1,244 @@
|
||||
#!/usr/bin/env bash
|
||||
set -u
|
||||
set -o pipefail
|
||||
|
||||
MOUNT="/opt/data/TESSERA_WORLD"
|
||||
OUTDIR="/home/otivm/OTIVM/data"
|
||||
OUTFILE="$OUTDIR/tessera_world_usb_inventory.txt"
|
||||
PYTHON="/home/otivm/pipeline-venv/bin/python3"
|
||||
|
||||
mkdir -p "$OUTDIR"
|
||||
|
||||
{
|
||||
echo "TESSERA WORLD USB Drive Inventory and Benchmark"
|
||||
echo "Generated: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
|
||||
echo "Mount: $MOUNT"
|
||||
echo ""
|
||||
|
||||
echo "=== PHASE 0: MOUNT CHECK ==="
|
||||
echo "--- Step 1: Mount contents ---"
|
||||
ls "$MOUNT" || exit 1
|
||||
echo ""
|
||||
|
||||
echo "--- Step 2: Mount details ---"
|
||||
df -h "$MOUNT"
|
||||
mount | grep TESSERA_WORLD || true
|
||||
echo ""
|
||||
|
||||
echo "--- Step 3: Read-only status ---"
|
||||
touch "$MOUNT/test_write_attempt" 2>&1 || echo "CONFIRMED READ-ONLY"
|
||||
echo ""
|
||||
|
||||
echo "=== PHASE 1: TOP-LEVEL STRUCTURE ==="
|
||||
echo "--- Step 4: Top-level directory listing ---"
|
||||
ls -lah "$MOUNT/"
|
||||
echo ""
|
||||
|
||||
echo "--- Step 5: Recursive directory tree, directories only ---"
|
||||
find "$MOUNT" -type d | sort
|
||||
echo ""
|
||||
|
||||
echo "--- Step 6: File count by directory ---"
|
||||
find "$MOUNT" -type f | sed 's|/[^/]*$||' | sort | uniq -c | sort -rn
|
||||
echo ""
|
||||
|
||||
echo "=== PHASE 2: FILE TYPE CENSUS ==="
|
||||
echo "--- Step 7: File extensions and counts ---"
|
||||
find "$MOUNT" -type f | sed 's|.*\.||' | sort | uniq -c | sort -rn
|
||||
echo ""
|
||||
|
||||
echo "--- Step 8: Total size by top-level subdirectory ---"
|
||||
du -sh "$MOUNT"/*/ 2>/dev/null | sort -rh
|
||||
echo ""
|
||||
|
||||
echo "--- Step 9: Total size of mount ---"
|
||||
du -sh "$MOUNT/"
|
||||
echo ""
|
||||
|
||||
echo "=== PHASE 3: PER-DATASET INVENTORY ==="
|
||||
echo "--- Step 10: GeoTIFF files ---"
|
||||
find "$MOUNT" -type f \( -name "*.tif" -o -name "*.tiff" \) -print0 \
|
||||
| xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' | sort -rh
|
||||
echo ""
|
||||
|
||||
echo "--- Step 11: HDF5 files ---"
|
||||
find "$MOUNT" -type f \( -name "*.h5" -o -name "*.hdf5" \) -print0 \
|
||||
| xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}'
|
||||
echo ""
|
||||
|
||||
echo "--- Step 12: NetCDF files ---"
|
||||
find "$MOUNT" -type f -name "*.nc" -print0 \
|
||||
| xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}'
|
||||
echo ""
|
||||
|
||||
echo "--- Step 13: CSV and TSV files ---"
|
||||
find "$MOUNT" -type f \( -name "*.csv" -o -name "*.tsv" \) -print0 \
|
||||
| xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}'
|
||||
echo ""
|
||||
|
||||
echo "--- Step 14: SQLite files ---"
|
||||
find "$MOUNT" -type f \( -name "*.sqlite" -o -name "*.sqlite3" -o -name "*.db" \) -print0 \
|
||||
| xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}'
|
||||
echo ""
|
||||
|
||||
echo "--- Step 15: Shapefile components ---"
|
||||
find "$MOUNT" -type f \( -name "*.shp" -o -name "*.dbf" -o -name "*.shx" -o -name "*.prj" \) -print0 \
|
||||
| xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' | sort
|
||||
echo ""
|
||||
|
||||
echo "--- Step 16: Other file types ---"
|
||||
find "$MOUNT" -type f \
|
||||
! -name "*.tif" ! -name "*.tiff" ! -name "*.h5" ! -name "*.hdf5" \
|
||||
! -name "*.nc" ! -name "*.csv" ! -name "*.tsv" \
|
||||
! -name "*.sqlite" ! -name "*.sqlite3" ! -name "*.db" \
|
||||
! -name "*.shp" ! -name "*.dbf" ! -name "*.shx" ! -name "*.prj" \
|
||||
-print0 | xargs -0 -r ls -lh 2>/dev/null | awk '{print $5, $9}' | sort
|
||||
echo ""
|
||||
|
||||
echo "=== PHASE 4: GEOTIFF METADATA ==="
|
||||
"$PYTHON" - <<'PYEOF'
|
||||
import glob
|
||||
import os
|
||||
|
||||
try:
|
||||
import rasterio
|
||||
except Exception as e:
|
||||
print(f"ERROR: rasterio unavailable: {e}")
|
||||
raise SystemExit(0)
|
||||
|
||||
patterns = [
|
||||
"/opt/data/TESSERA_WORLD/**/*.tif",
|
||||
"/opt/data/TESSERA_WORLD/**/*.tiff",
|
||||
]
|
||||
|
||||
files = []
|
||||
for p in patterns:
|
||||
files.extend(glob.glob(p, recursive=True))
|
||||
files.sort()
|
||||
|
||||
if not files:
|
||||
print("No GeoTIFF files found.")
|
||||
else:
|
||||
for f in files:
|
||||
try:
|
||||
with rasterio.open(f) as ds:
|
||||
print("---")
|
||||
print(f"FILE: {f}")
|
||||
print(f"SIZE: {os.path.getsize(f) / 1e9:.2f} GB")
|
||||
print(f"CRS: {ds.crs}")
|
||||
print(f"RES: {ds.res}")
|
||||
print(f"BOUNDS: {ds.bounds}")
|
||||
print(f"SHAPE: {ds.width} x {ds.height} px")
|
||||
print(f"BANDS: {ds.count}")
|
||||
print(f"DTYPE: {ds.dtypes}")
|
||||
print(f"NODATA: {ds.nodata}")
|
||||
except Exception as e:
|
||||
print(f"ERROR reading {f}: {e}")
|
||||
PYEOF
|
||||
echo ""
|
||||
|
||||
echo "=== PHASE 5: READ SPEED BENCHMARKS ==="
|
||||
echo "--- Step 18: Raw sequential read speed of largest GeoTIFF ---"
|
||||
LARGEST=$(find "$MOUNT" -type f \( -name "*.tif" -o -name "*.tiff" \) -print0 \
|
||||
| xargs -0 -r ls -s 2>/dev/null | sort -rn | head -1 | awk '{print $2}')
|
||||
echo "Benchmarking: $LARGEST"
|
||||
if [ -n "${LARGEST:-}" ]; then
|
||||
dd if="$LARGEST" of=/dev/null bs=1M status=progress 2>&1 | tail -3
|
||||
else
|
||||
echo "No GeoTIFF found for sequential benchmark."
|
||||
fi
|
||||
echo ""
|
||||
|
||||
echo "--- Step 19: Random point sample speed, 2401 points from first GeoTIFF ---"
|
||||
"$PYTHON" - <<'PYEOF'
|
||||
import glob
|
||||
import time
|
||||
|
||||
try:
|
||||
import rasterio
|
||||
import numpy as np
|
||||
except Exception as e:
|
||||
print(f"ERROR: rasterio/numpy unavailable: {e}")
|
||||
raise SystemExit(0)
|
||||
|
||||
files = glob.glob("/opt/data/TESSERA_WORLD/**/*.tif", recursive=True)
|
||||
files += glob.glob("/opt/data/TESSERA_WORLD/**/*.tiff", recursive=True)
|
||||
|
||||
if not files:
|
||||
print("No GeoTIFF found for benchmark.")
|
||||
else:
|
||||
f = sorted(files)[0]
|
||||
print(f"Benchmarking random point reads from: {f}")
|
||||
with rasterio.open(f) as ds:
|
||||
bounds = ds.bounds
|
||||
rng = np.random.default_rng(42)
|
||||
lons = rng.uniform(bounds.left, bounds.right, 2401)
|
||||
lats = rng.uniform(bounds.bottom, bounds.top, 2401)
|
||||
coords = list(zip(lons, lats))
|
||||
|
||||
t0 = time.perf_counter()
|
||||
results = list(ds.sample(coords))
|
||||
t1 = time.perf_counter()
|
||||
|
||||
elapsed = t1 - t0
|
||||
print(f"2401 point samples: {elapsed:.3f}s")
|
||||
print(f"Per-point: {elapsed/2401*1000:.3f}ms")
|
||||
print(f"Projected H5 time: {elapsed:.1f}s per H5 hex")
|
||||
print(f"Five H5s: {elapsed*5:.1f}s total")
|
||||
PYEOF
|
||||
echo ""
|
||||
|
||||
echo "--- Step 20: SQLite write speed baseline on local disk ---"
|
||||
"$PYTHON" - <<'PYEOF'
|
||||
import sqlite3
|
||||
import time
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
tmp = tempfile.mktemp(suffix=".sqlite3")
|
||||
con = sqlite3.connect(tmp)
|
||||
con.execute("PRAGMA journal_mode=WAL")
|
||||
con.execute("PRAGMA synchronous=NORMAL")
|
||||
con.execute("""
|
||||
CREATE TABLE bench (
|
||||
id INTEGER PRIMARY KEY,
|
||||
h9 INTEGER, h7 INTEGER, h5 INTEGER,
|
||||
lat REAL, lon REAL,
|
||||
elev_cm INTEGER, terrain INTEGER, hydro INTEGER,
|
||||
geo_dep INTEGER, geo_flag INTEGER, occ_flag INTEGER,
|
||||
status INTEGER, run_id INTEGER, created_at TEXT
|
||||
)
|
||||
""")
|
||||
|
||||
rows = [
|
||||
(i, i*10, i*100, i*1000,
|
||||
40.0 + i*0.001, 12.0 + i*0.001,
|
||||
100, 1, 0, 255, 0, 0,
|
||||
1, 1, "2026-04-26T00:00:00Z")
|
||||
for i in range(2401)
|
||||
]
|
||||
|
||||
t0 = time.perf_counter()
|
||||
con.executemany("""
|
||||
INSERT INTO bench
|
||||
(id, h9, h7, h5, lat, lon, elev_cm, terrain, hydro,
|
||||
geo_dep, geo_flag, occ_flag, status, run_id, created_at)
|
||||
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||
""", rows)
|
||||
con.commit()
|
||||
t1 = time.perf_counter()
|
||||
|
||||
elapsed = t1 - t0
|
||||
print(f"2401 row INSERT+COMMIT: {elapsed:.3f}s")
|
||||
print(f"Per-row: {elapsed/2401*1000:.3f}ms")
|
||||
con.close()
|
||||
os.unlink(tmp)
|
||||
PYEOF
|
||||
echo ""
|
||||
|
||||
echo "=== PHASE 6: REPORT LOCATION ==="
|
||||
echo "Inventory written to $OUTFILE"
|
||||
} | tee "$OUTFILE"
|
||||
|
||||
echo ""
|
||||
wc -l "$OUTFILE"
|
||||
Reference in New Issue
Block a user