Skip to content

Commit

Permalink
fix ram tracking
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyasMoutawwakil committed Feb 14, 2024
1 parent 21cebb7 commit ef50b83
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ install:
pip install -e .

build_docker_cpu:
docker build -f docker/cuda.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) -t opt-bench-cpu:latest .
docker build -f docker/cpu.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) -t opt-bench-cpu:latest .

build_docker_cuda:
docker build -f docker/cuda.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) --build-arg TORCH_CUDA=cu118 --build-arg CUDA_VERSION=11.8.0 -t opt-bench-cuda:11.8.0 .
Expand Down
5 changes: 4 additions & 1 deletion optimum_benchmark/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,10 @@ def get_gpu_vram_mb() -> List[int]:

if rocm_version >= "5.7":
device_handles = amdsmi.amdsmi_get_processor_handles()
vrams = [amdsmi.amdsmi_get_gpu_memory_total(device_handle) for device_handle in device_handles]
vrams = [
amdsmi.amdsmi_get_gpu_memory_total(device_handle, mem_type=amdsmi.AmdSmiMemoryType.VRAM)
for device_handle in device_handles
]
else:
device_handles = amdsmi.amdsmi_get_device_handles()
vrams = [
Expand Down
7 changes: 3 additions & 4 deletions optimum_benchmark/trackers/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ def __init__(self, device: str, backend: str, device_ids: Optional[str] = None):
self.backend = backend
self.device_ids = device_ids

self.max_ram_memory: List[int] = []
self.max_vram_memory: List[int] = []
self.max_ram_memory: float = 0
self.max_vram_memory: float = 0
self.max_reserved_memory: float = 0
self.max_allocated_memory: float = 0

Expand Down Expand Up @@ -143,7 +143,6 @@ def _cuda_memory(self):

yield from self._cpu_memory()

# if process still running, get the last memory snapshot
parent_connection.send(True)
self.max_vram_memory = parent_connection.recv()

Expand All @@ -160,7 +159,7 @@ def _cpu_memory(self):
yield

parent_connection.send(True)
self.max_vram_memory = parent_connection.recv()
self.max_ram_memory = parent_connection.recv()

def get_max_memory(self):
if self.device == "cuda" and self.backend == "pytorch":
Expand Down
17 changes: 8 additions & 9 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def test_api_latency_tracker(device, backend):
latency = tracker.get_latency()
latency.log()

assert latency[0] > expected_latency * 0.9
assert latency[0] < expected_latency * 1.1
assert latency.mean < expected_latency * 1.1
assert latency.mean > expected_latency * 0.9


@pytest.mark.parametrize("device", DEVICES)
Expand All @@ -85,14 +85,10 @@ def test_api_memory_tracker(device, backend):

tracker.reset()
with tracker.track():
time.sleep(2)
time.sleep(1)
array = torch.randn((10000, 10000), dtype=torch.float64, device=device)
expected_memory = array.nbytes / 1e6
time.sleep(2)

del array
gc.collect()
torch.cuda.empty_cache()
time.sleep(1)

final_memory = tracker.get_max_memory()
final_memory.log()
Expand All @@ -102,13 +98,16 @@ def test_api_memory_tracker(device, backend):
elif device == "cuda":
measured_memory = final_memory.max_vram - initial_memory.max_vram
if torch.version.hip is not None:
measured_memory -= 1600 # ???
measured_memory -= 1600 # something is wrong with amdsmi or rocm
else:
measured_memory = final_memory.max_ram - initial_memory.max_ram

assert measured_memory < expected_memory * 1.1
assert measured_memory > expected_memory * 0.9

del array
gc.collect()


@pytest.mark.parametrize("library,task,model", LIBRARIES_TASKS_MODELS)
def test_api_input_generator(library, task, model):
Expand Down

0 comments on commit ef50b83

Please sign in to comment.