Bexhoma: Read size of datadisk in megabyte - #398

Beuth-Erdelt · Jan 2, 2025 · 78f45af · 78f45af
1 parent 2c096fd
commit 78f45af
Show file tree

Hide file tree

Showing 3 changed files with 34 additions and 5 deletions.
diff --git a/bexhoma/clusters.py b/bexhoma/clusters.py
@@ -967,15 +967,15 @@ def OLD_continueBenchmarks(self, connection=None, query=None):
     def OLD_runReporting(self):
         evaluator.evaluator(self.benchmark, load=False, force=True)
         self.benchmark.generateReportsAll()
-    def copyLog(self):
+    def OLD_copyLog(self):
         print("copyLog")
         if len(self.docker['logfile']):
             cmd = {}
             cmd['prepare_log'] = 'mkdir /data/'+str(self.code)
             stdin, stdout, stderr = self.execute_command_in_pod(cmd['prepare_log'], container='dbms')
             cmd['save_log'] = 'cp '+self.docker['logfile']+' /data/'+str(self.code)+'/'+self.connection+'.log'
             stdin, stdout, stderr = self.execute_command_in_pod(cmd['save_log'], container='dbms')
-    def copyInits(self):
+    def OLD_copyInits(self):
         print("copyInits")
         cmd = {}
         cmd['prepare_log'] = 'mkdir /data/'+str(self.code)
@@ -1017,7 +1017,7 @@ def get_pod_containers(self, pod):
         initContainers = output.split(" ")
         self.logger.debug("Pod {} has container {}".format(pod, containers + initContainers))
         return containers + initContainers
-    def downloadLog(self):
+    def OLD_downloadLog(self):
         print("downloadLog")
         self.kubectl('cp --container dbms '+self.activepod+':/data/'+str(self.code)+'/ '+self.config['benchmarker']['resultfolder'].replace("\\", "/").replace("C:", "")+"/"+str(self.code))
     def get_jobs(self, app='', component='', experiment='', configuration='', client=''):

diff --git a/bexhoma/configurations.py b/bexhoma/configurations.py
@@ -1736,13 +1736,13 @@ def get_host_diskspace_used_data(self):
         else:
             return 0
         try:
-            command = "du "+datadir+" | awk 'END{print \\$1}'"
+            command = "du --block-size=1M -Ls "+datadir+" | awk 'END{print \\$1}'"
             cmd['disk_space_used'] = command
             stdin, stdout, stderr = self.execute_command_in_pod_sut(cmd['disk_space_used'])
             return int(stdout.replace('\n',''))
         except Exception as e:
             # Windows
-            command = "du "+datadir+" | awk 'END{print $1}'"
+            command = "du --block-size=1M -Ls "+datadir+" | awk 'END{print $1}'"
             cmd['disk_space_used'] = command
             try:
                 stdin, stdout, stderr = self.execute_command_in_pod_sut(cmd['disk_space_used'])

diff --git a/docs/DBMS.md b/docs/DBMS.md
@@ -57,6 +57,35 @@ This has
 * an optional name of a `logfile` that is downloaded after the benchmark
 * name of the `datadir` of the DBMS. It's size is measured using `du` after data loading has been finished.
 
+### Collect Host Informations
+
+Some information is given by configuration (JDBC data e.g.), some is collected automatically from the experiment host:
+```
+cluster.get_host_memory()
+cluster.get_host_cpu()
+cluster.get_host_cores()
+cluster.get_host_system()
+cluster.get_host_diskspace_used()
+cluster.get_host_diskspace_used_data()
+cluster.get_host_cuda()
+cluster.get_host_gpus()
+cluster.get_host_gpu_ids()
+cluster.get_host_node()
+```
+
+Most of these run inside the docker container:
+* `cluster.get_host_memory()`: Collects `grep MemTotal /proc/meminfo | awk '{print $2}'` and multiplies by 1024
+* `cluster.get_host_cpu()`: Collects `cat /proc/cpuinfo | grep \'model name\' | head -n 1`
+* `cluster.get_host_cores()`: Collects `grep -c ^processor /proc/cpuinfo`
+* `cluster.get_host_system()`: Collects `uname -r`
+* `cluster.get_host_diskspace_used()`: Collects `df / | awk 'NR == 2{print $3}'`
+* `cluster.get_host_diskspace_used_data()`: Collects `du datadir | awk 'END{ FS=OFS=\"\t\" }END{print $1}'` inside docker container, where `datadir` is set in config of DBMS
+* `cluster.get_host_cuda()`: Collects `nvidia-smi | grep \'CUDA\'`
+* `cluster.get_host_gpus()`: Collects `nvidia-smi -L` and then aggregates the type using `Counter([x[x.find(":")+2:x.find("(")-1] for x in l if len(x)>0])`
+* `cluster.get_host_gpu_ids()`: Collects `nvidia-smi -L` and finds 'UUID: ' inside
+* `cluster.get_host_node()`: Gets `spec.nodeName` from pod description
+* `cluster.get_host_volume()`: Gets size and used from `df -h | grep volumes`
+
 ### Deployment Manifests
 
 Every DBMS that is deployed by bexhoma needs a YAML manifest.