Skip to content

Commit

Permalink
Bexhoma: Read size of datadisk in megabyte - #398
Browse files Browse the repository at this point in the history
  • Loading branch information
perdelt committed Jan 2, 2025
1 parent 2c096fd commit 78f45af
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 5 deletions.
6 changes: 3 additions & 3 deletions bexhoma/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -967,15 +967,15 @@ def OLD_continueBenchmarks(self, connection=None, query=None):
def OLD_runReporting(self):
evaluator.evaluator(self.benchmark, load=False, force=True)
self.benchmark.generateReportsAll()
def copyLog(self):
def OLD_copyLog(self):
print("copyLog")
if len(self.docker['logfile']):
cmd = {}
cmd['prepare_log'] = 'mkdir /data/'+str(self.code)
stdin, stdout, stderr = self.execute_command_in_pod(cmd['prepare_log'], container='dbms')
cmd['save_log'] = 'cp '+self.docker['logfile']+' /data/'+str(self.code)+'/'+self.connection+'.log'
stdin, stdout, stderr = self.execute_command_in_pod(cmd['save_log'], container='dbms')
def copyInits(self):
def OLD_copyInits(self):
print("copyInits")
cmd = {}
cmd['prepare_log'] = 'mkdir /data/'+str(self.code)
Expand Down Expand Up @@ -1017,7 +1017,7 @@ def get_pod_containers(self, pod):
initContainers = output.split(" ")
self.logger.debug("Pod {} has container {}".format(pod, containers + initContainers))
return containers + initContainers
def downloadLog(self):
def OLD_downloadLog(self):
print("downloadLog")
self.kubectl('cp --container dbms '+self.activepod+':/data/'+str(self.code)+'/ '+self.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code))
def get_jobs(self, app='', component='', experiment='', configuration='', client=''):
Expand Down
4 changes: 2 additions & 2 deletions bexhoma/configurations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1736,13 +1736,13 @@ def get_host_diskspace_used_data(self):
else:
return 0
try:
command = "du "+datadir+" | awk 'END{print \\$1}'"
command = "du --block-size=1M -Ls "+datadir+" | awk 'END{print \\$1}'"
cmd['disk_space_used'] = command
stdin, stdout, stderr = self.execute_command_in_pod_sut(cmd['disk_space_used'])
return int(stdout.replace('\n',''))
except Exception as e:
# Windows
command = "du "+datadir+" | awk 'END{print $1}'"
command = "du --block-size=1M -Ls "+datadir+" | awk 'END{print $1}'"
cmd['disk_space_used'] = command
try:
stdin, stdout, stderr = self.execute_command_in_pod_sut(cmd['disk_space_used'])
Expand Down
29 changes: 29 additions & 0 deletions docs/DBMS.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,35 @@ This has
* an optional name of a `logfile` that is downloaded after the benchmark
* name of the `datadir` of the DBMS. It's size is measured using `du` after data loading has been finished.

### Collect Host Informations

Some information is given by configuration (JDBC data e.g.), some is collected automatically from the experiment host:
```
cluster.get_host_memory()
cluster.get_host_cpu()
cluster.get_host_cores()
cluster.get_host_system()
cluster.get_host_diskspace_used()
cluster.get_host_diskspace_used_data()
cluster.get_host_cuda()
cluster.get_host_gpus()
cluster.get_host_gpu_ids()
cluster.get_host_node()
```

Most of these run inside the docker container:
* `cluster.get_host_memory()`: Collects `grep MemTotal /proc/meminfo | awk '{print $2}'` and multiplies by 1024
* `cluster.get_host_cpu()`: Collects `cat /proc/cpuinfo | grep \'model name\' | head -n 1`
* `cluster.get_host_cores()`: Collects `grep -c ^processor /proc/cpuinfo`
* `cluster.get_host_system()`: Collects `uname -r`
* `cluster.get_host_diskspace_used()`: Collects `df / | awk 'NR == 2{print $3}'`
* `cluster.get_host_diskspace_used_data()`: Collects `du datadir | awk 'END{ FS=OFS=\"\t\" }END{print $1}'` inside docker container, where `datadir` is set in config of DBMS
* `cluster.get_host_cuda()`: Collects `nvidia-smi | grep \'CUDA\'`
* `cluster.get_host_gpus()`: Collects `nvidia-smi -L` and then aggregates the type using `Counter([x[x.find(":")+2:x.find("(")-1] for x in l if len(x)>0])`
* `cluster.get_host_gpu_ids()`: Collects `nvidia-smi -L` and finds 'UUID: ' inside
* `cluster.get_host_node()`: Gets `spec.nodeName` from pod description
* `cluster.get_host_volume()`: Gets size and used from `df -h | grep volumes`

### Deployment Manifests

Every DBMS that is deployed by bexhoma needs a YAML manifest.
Expand Down

0 comments on commit 78f45af

Please sign in to comment.