Skip to content

Commit

Permalink
feat: generates csv of stats to share with team;combine stats.json in…
Browse files Browse the repository at this point in the history
… nested directories of multiple versions
  • Loading branch information
danellecline committed Dec 31, 2024
1 parent 11a97bc commit 50931d2
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 6 deletions.
58 changes: 58 additions & 0 deletions aipipeline/prediction/gen_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# aipipeline, Apache-2.0 license
# Filename: aipipeline/prediction/compute_stats.py
# Description: Compute stats for downloaded datasets and save to a csv file
import json
from datetime import datetime
from pathlib import Path
import logging

logger = logging.getLogger(__name__)
formatter = logging.Formatter("%(asctime)s %(levelname)s %(message)s")
# Also log to the console
console = logging.StreamHandler()
logger.addHandler(console)
logger.setLevel(logging.INFO)
# and log to file
now = datetime.now()
log_filename = f"gen_stats_{now:%Y%m%d}.log"
handler = logging.FileHandler(log_filename, mode="w")
handler.setFormatter(formatter)
handler.setLevel(logging.DEBUG)
logger.addHandler(handler)


def run(argv=None):
import argparse

parser = argparse.ArgumentParser(description="Compute stats for dataset.")
parser.add_argument("--data", required=True, help="Root path for the dataset")
parser.add_argument("--prefix", required=True, help="Prefix for the dataset")
args, beam_args = parser.parse_known_args(argv)

# Find all the stats.json files
download_path = Path(args.data)
combined_stats = {}
logger.info(f"Computing stats for {download_path}")
for d in download_path.rglob("stats.json"):
logger.info(f"Found stats file: {d}")
stats = json.load(d.open())
for version, stat in stats.items():
for k,v in stat.items():
if k in combined_stats:
combined_stats[k] += int(v)
else:
combined_stats[k] = int(v)

# Sort the stats in descending order
combined_stats = dict(sorted(combined_stats.items(), key=lambda x: x[1], reverse=True))

# Save to a csv file
output_path = download_path / f"{args.prefix}stats.csv"
logger.info(f"Saving stats to {output_path}")
with output_path.open("w") as f:
f.write("label,count\n")
for k,v in combined_stats.items():
f.write(f"{k},{v}\n")

if __name__ == "__main__":
run()
19 changes: 13 additions & 6 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -331,15 +331,22 @@ gen-cfe-data:

# Generate training data for the i2map project
gen-i2map-data:
just --justfile {{justfile()}} download-crop i2map --skip-clean True --version Baseline
just --justfile {{justfile()}} download-crop i2map --version Baseline

# Generate training data for the i2map project from the bulk server, run with ENV_FILE=.env.i2map just gen-i2mapbulk-data
gen-i2mapbulk-data:
just --justfile {{justfile()}} download-crop i2mapbulk --skip-clean True --version Baseline
just --justfile {{justfile()}} download-crop i2mapbulk --skip-clean True --version dino_vits8_20240207_022529
just --justfile {{justfile()}} download-crop i2mapbulk --version Baseline
just --justfile {{justfile()}} download-crop i2mapbulk --version dino_vits8_20240207_022529

# Generate training data for the uav project
gen-uav-data:
just --justfile {{justfile()}} download-crop i2map --skip-clean True --version Baseline
just --justfile {{justfile()}} download-crop i2map --skip-clean True --version yolov5x6-uavs-oneclass-uav-vit-b-16
just --justfile {{justfile()}} download-crop i2map --skip-clean True --version uav-yolov5-30k-vs
just --justfile {{justfile()}} download-crop i2map --skip-clean True --version Baseline
just --justfile {{justfile()}} download-crop i2map --skip-clean True --version yolov5x6-uavs-oneclass-uav-vit-b-16
just --justfile {{justfile()}} download-crop i2map --skip-clean True --version uav-yolov5-30k

# Generate training data stats
gen-stats-csv project='UAV' data='/mnt/ML_SCRATCH/UAV/':
#!/usr/bin/env bash
export PYTHONPATH=.
time conda run -n aipipeline python3 aipipeline/prediction/gen_stats.py --data {{data}} --prefix {{project}}

0 comments on commit 50931d2

Please sign in to comment.