From 3368157d47e07147b479b869195d1d4fb2e3b7c5 Mon Sep 17 00:00:00 2001 From: Pavel Tomanek Date: Fri, 24 Jan 2025 16:14:04 +0100 Subject: [PATCH] #487 v4 - with LAMMPS debug --- 487_DeePMD-kit/deePMD.eb | 1 + 487_DeePMD-kit/deePMD_v3-components.eb | 19 +- 487_DeePMD-kit/deePMD_v4-lammps.eb | 263 +++++++++++++++++++++++++ 487_DeePMD-kit/horovod_v4-PyTorch.eb | 5 +- 4 files changed, 283 insertions(+), 5 deletions(-) create mode 100644 487_DeePMD-kit/deePMD_v4-lammps.eb diff --git a/487_DeePMD-kit/deePMD.eb b/487_DeePMD-kit/deePMD.eb index 6dc4ebf6..99a148ea 100644 --- a/487_DeePMD-kit/deePMD.eb +++ b/487_DeePMD-kit/deePMD.eb @@ -1,3 +1,4 @@ +# working deepdm-kit python easyblock = 'PythonBundle' name = 'DeePDM-kit' diff --git a/487_DeePMD-kit/deePMD_v3-components.eb b/487_DeePMD-kit/deePMD_v3-components.eb index eb0ae846..220b8796 100644 --- a/487_DeePMD-kit/deePMD_v3-components.eb +++ b/487_DeePMD-kit/deePMD_v3-components.eb @@ -63,7 +63,7 @@ dependencies = [ ('Horovod', '0.28.1', versionsuffix), ('typing-extensions', '4.9.0'), # ('protobuf', '24.0'), - ('LAMMPS', '2Aug2023_update2', '-kokkos'), + # ('LAMMPS', '2Aug2023_update2', '-kokkos'), ] @@ -88,9 +88,9 @@ components = [ 'source_urls': ['https://pypi.python.org/packages/source/d/deepmd-kit/'], 'sources': ['deepmd_kit-%(version)s.tar.gz'], 'start_dir': 'deepmd_kit-%(version)s/source', - 'preconfigopts': "module swap protobuf/3.21.9-GCCcore-12.3.0 && ", - # $EBROOTLAMMPS - 'configopts': '-DENABLE_TENSORFLOW=TRUE -DUSE_TF_PYTHON_LIBS=TRUE -DLAMMPS_SOURCE_ROOT=$EBROOTLAMMPS', + # 'preconfigopts': "module swap protobuf/3.21.9-GCCcore-12.3.0 && ", + # 'configopts': '-DENABLE_TENSORFLOW=TRUE -DUSE_TF_PYTHON_LIBS=TRUE -DLAMMPS_SOURCE_ROOT=$EBROOTLAMMPS', + 'configopts': '-DENABLE_TENSORFLOW=TRUE -DUSE_TF_PYTHON_LIBS=TRUE', 'prebuildopts': "module swap protobuf/3.21.9-GCCcore-12.3.0 && ", 'checksums': ['10d4443c6fe31a9a4573ed6eda73b6a669dae572cf2bc43f45e9a63aaae02cff'], }), @@ -195,6 +195,17 @@ moduleclass = 'ai' # $EBROOTDEEPDMMINKIT # libtensorflow_cc.so is in $EBROOTTESORFLOW/lib/python3.11/site-packages/tensorflow/libtensorflow_cc.so.2 +# E3: OK + # -> create separate ec deepmd_with_LAMMPS + # -> it needs sources of LAMMPS not installation + # -> no version.h in include + # -> no such file -LAMMPSUtils.cmake- in LAMMPS but in sources of LAMMPS it is there + # <- after add LAMMPS the cmake crashed + # CMake Error at lmp/plugin/CMakeLists.txt:39 (include): + # include could not find requested file: + # /apps/gent/RHEL8/cascadelake-ib/software/LAMMPS/2Aug2023_update2-foss-2023a-kokkos/cmake/Modules/LAMMPSUtils.cmake + # CMake Error at lmp/plugin/CMakeLists.txt:40 (get_lammps_version): + # Unknown CMake command "get_lammps_version". # E2: OK # -> put hack to prebuildopts too -> OK # <- problem with build step (make of cpp component) - even with the heck it retuns this <- log4.txt diff --git a/487_DeePMD-kit/deePMD_v4-lammps.eb b/487_DeePMD-kit/deePMD_v4-lammps.eb new file mode 100644 index 00000000..9f5cdee5 --- /dev/null +++ b/487_DeePMD-kit/deePMD_v4-lammps.eb @@ -0,0 +1,263 @@ +easyblock = 'PythonBundle' + +name = 'DeePDM-kit' +version = '3.0.1' +# versionsuffix = '-CUDA-%(cudaver)s' +local_tf_version = '2.15.1' +versionsuffix = '-TensorFlow-%s' % local_tf_version + +homepage = 'https://github.com/deepmodeling/deepmd-kit/' +description = "A deep learning package for many-body potential energy representation and molecular dynamics." + +toolchain = {'name': 'foss', 'version': '2023a'} + +# DEPS: + # OK scikit-build-core>=0.5,<0.11,!=0.6.0 + # OK 'numpy>=1.21', + # OK 'scipy', + # OK 'pyyaml', + # ok 'dargs >= 0.4.7', + # OK "h5py>=3.6.0,!=3.11.0; platform_system=='Linux' and platform_machine=='aarch64'", + # ok 'wcmatch', + # OK 'ml_dtypes', + # ok 'mendeleev', + # ok 'array-api-compat', + # ok dpdata + # parallel training deps: + # OK? horovod - horovod_v2_kenneths_hack + # OK mpi4py + # ok dpdata 0.2.21 requires monty, which is not installed. + # ok dargs 0.4.10 requires typeguard, which is not installed. + # OK mendeleev 0.20.1 requires deprecated, which is not installed. + # OK mendeleev 0.20.1 requires pydantic, which is not installed. + # ok mendeleev 0.20.1 requires pyfiglet, which is not installed. + # OK mendeleev 0.20.1 requires sqlalchemy, which is not installed. + # ok monty 2025.1.9 requires ruamel-yaml, which is not installed. + # ok wcmatch 8.5.2 requires bracex, which is not installed. + # ok typeguard 4.3.0 has requirement typing-extensions>=4.10.0, but you have typing-extensions 4.9.0. -> v4.2.0 + # ok mendeleev 0.20.1 has requirement pydantic<3.0.0,>=2.9.2, but you have pydantic 2.5.3. -> 0.18.1 + # ok mendeleev 0.20.1 has requirement pyfiglet<0.9,>=0.8.post1, but you have pyfiglet 1.0.2. + # ok tensorflow 2.15.1 has requirement wrapt<1.15,>=1.11.0, but you have wrapt 1.15.0. + +builddependencies = [ + ('scikit-build-core', '0.9.3'), + ('hatchling', '1.18.0'), + ('poetry', '1.5.1'), + ('git', '2.41.0', '-nodocs'), + ('CMake', '3.26.3'), +] +dependencies = [ + # ('CUDA', '12.6.0', '', SYSTEM), + ('Python', '3.11.3'), + ('SciPy-bundle', '2023.07'), + ('ml_dtypes', '0.3.2'), + ('PyYAML', '6.0'), + ('h5py', '3.9.0'), + ('TensorFlow', '2.15.1'), + ('jax', '0.4.25'), + ('mpi4py', '3.1.4'), + ('Deprecated', '1.2.14'), + # ('pydantic', '2.5.3'), + ('SQLAlchemy','2.0.25'), + ('ruamel.yaml', '0.17.32'), + ('Horovod', '0.28.1', versionsuffix), + ('typing-extensions', '4.9.0'), + # ('protobuf', '24.0'), + # ('LAMMPS', '2Aug2023_update2', '-kokkos'), + +] + +# https://files.pythonhosted.org/packages/5c/f5/3be09970408c89c7767ab629cf80ecbd984ef3401a534a41e332f0e75bb7/deepmd_kit-3.0.1.tar.gz +components = [ + ('LLAMPS', '2Aug2023_update2', { + 'easyblock': 'Tarball', + 'source_urls': ['https://github.com/lammps/lammps/archive/'], + 'sources': ['stable_2Aug2023_update4.tar.gz'], + 'checksums': ['6eed007cc24cda80b5dd43372b2ad4268b3982bb612669742c8c336b79137b5b'], + }), + ('deepmd', version, { + 'easyblock': 'PythonPackage', + # 'modulename': 'deepmd', + # 'source_tmpl': 'deepmd_kit-%(version)s.tar.gz', + # 'source_urls': [PYPI_SOURCE], + 'source_urls': ['https://pypi.python.org/packages/source/d/deepmd-kit/'], + 'sources': ['deepmd_kit-%(version)s.tar.gz'], + 'preinstallopts': "module swap protobuf/3.21.9-GCCcore-12.3.0 && ", + 'use_pip': True, + 'start_dir': 'deepmd_kit-%(version)s', + 'checksums': ['10d4443c6fe31a9a4573ed6eda73b6a669dae572cf2bc43f45e9a63aaae02cff'], + }), + ('deepmd_cpp', version, { + 'easyblock': 'CMakeMake', + # 'source_tmpl': 'deepmd_kit-%(version)s.tar.gz', + # 'source_urls': [PYPI_SOURCE], + 'source_urls': ['https://pypi.python.org/packages/source/d/deepmd-kit/'], + 'sources': ['deepmd_kit-%(version)s.tar.gz'], + 'start_dir': 'deepmd_kit-%(version)s/source', + # 'preconfigopts': "module swap protobuf/3.21.9-GCCcore-12.3.0 && ", + # $EBROOTLAMMPS + # 'configopts': '-DENABLE_TENSORFLOW=TRUE -DUSE_TF_PYTHON_LIBS=TRUE -DLAMMPS_SOURCE_ROOT=$EBROOTLAMMPS', + 'configopts': '-DENABLE_TENSORFLOW=TRUE -DUSE_TF_PYTHON_LIBS=TRUE', + 'prebuildopts': "module swap protobuf/3.21.9-GCCcore-12.3.0 && ", + 'checksums': ['10d4443c6fe31a9a4573ed6eda73b6a669dae572cf2bc43f45e9a63aaae02cff'], + }), + # ('OpenMM', '7.5.1', { + # 'easyblock': 'CMakeMake', + # 'source_urls': ['https://github.com/openmm/openmm/archive/'], + # 'sources': [{'download_filename': '%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}], + # 'patches': [('OpenMM-%(version)s_AlphaFold.patch', 'wrappers/python')], + # 'checksums': [ + # 'c88d6946468a2bde2619acb834f57b859b5e114a93093cf562165612e10f4ff7', # OpenMM-7.5.1.tar.gz + # '1b109dfff3af5c6aa70690bca14618612953c68840a7e64f679db7ca33c1aff6', # OpenMM-7.5.1_AlphaFold.patch + # ], + # 'start_dir': 'openmm-%(version)s', + # 'preinstallopts': local_openmm_preinstallopts, + # 'installopts': local_openmm_installopts, + # }), + # (name, version, { + # 'easyblock': 'PythonPackage', + # 'source_urls': [ + # 'https://github.com/deepmind/alphafold/archive/refs/tags/', + # 'https://git.scicore.unibas.ch/schwede/openstructure/-/raw/%s/modules/mol/alg/src/' % local_scp_commit, + # ], + # 'sources': [ + # { + # 'download_filename': 'v%(version)s.tar.gz', + # 'filename': SOURCE_TAR_GZ, + # }, + # { + # 'download_filename': 'stereo_chemical_props.txt', + # 'filename': 'stereo_chemical_props-%s.txt' % local_scp_commit, + # 'extract_cmd': "cp %s .", + # }, + # ], + # 'patches': [ + # 'AlphaFold-%(version)s_fix-packages.patch', + # 'AlphaFold-%(version)s_data-dep-paths.patch', + # 'AlphaFold-%(version)s_n-cpu.patch', + # 'AlphaFold-%(version)s_fix-scp-path.patch', + # ], + # 'checksums': [ + # '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz + # '24510899eeb49167cffedec8fa45363a4d08279c0c637a403b452f7d0ac09451', # stereo_chemical_props-7102c6.txt + # '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch + # '7223e297b23f90816219095696bc6453910c617add60b907a0d3e869da8733bb', # AlphaFold-2.0.0_data-dep-paths.patch + # 'dfda4dd5f9aba19fe2b6eb9a0ec583d12dcefdfee8ab8803fc57ad48d582db04', # AlphaFold-2.0.0_n-cpu.patch + # '83dc82a8b1c647eb7e217aef683153e98a4fc7f871a85280976c92a1bfe28f27', # AlphaFold-2.0.0_fix-scp-path.patch + # ], + # 'start_dir': 'alphafold-%(version)s', + # 'use_pip': True, + # }), +] + +use_pip = True +sanity_pip_check = True + +# local_preinstallopts = 'module swap protobuf/3.21.9-GCCcore-12.3.0 && ' + +exts_list = [ + ('wrapt', '1.14.1', { + 'checksums': ['380a85cf89e0e69b7cfbe2ea9f765f004ff419f34194018a6827ac0e3edfed4d'], + }), + ('typeguard', '4.2.0', { + 'checksums': ['2aeae510750fca88d0a2ceca3e86de7f71aa43b6c3e6c267737ce1f5effc4b34'], + }), + ('dargs', '0.4.10', { + 'checksums': ['2b39e0a93dcd323d0affb3f54ee2c11a439084d718934df08f38692dfbadddf8'], + }), + ('bracex', '2.5.post1', { + 'source_tmpl': '%(name)s-%(version)s-py3-none-any.whl', + 'checksums': ['13e5732fec27828d6af308628285ad358047cec36801598368cb28bc631dbaf6'], + }), + ('wcmatch', '8.5.2', { + 'checksums': ['a70222b86dea82fb382dd87b73278c10756c138bd6f8f714e2183128887b9eb2'], + }), + ('pyfiglet', '0.8.post1', { + 'checksums': ['c6c2321755d09267b438ec7b936825a4910fec696292139e664ca8670e103639'], + }), + ('mendeleev', '0.18.1', { + 'checksums': ['a5b60bd313a5d2b404a6a250186e643663d5625c8138b3cfba829f1f4384f2a0'], + }), + ('array_api_compat', '1.10.0', { + 'checksums': ['eb98056fa4993e7e98860b7a1ca73c9ae1c77f1ef95366a5ebd5dec8e6d55bad'], + }), + ('monty', '2025.1.9', { + 'checksums': ['edb680b01ea1e59225cb666634b0dd2b2393eef07f3d45748445db92e1f1006d'], + }), + ('dpdata', '0.2.21', { + 'checksums': ['55dcec61bdc8707fb6b3e57406fb7c07b6ccb7a0ac763a1407cc1c3222bf58b1'], + }), +] + +# sanity_check_paths = { +# 'files': ['bin/%(name)s'], +# 'dirs': ['lib/python%(pyshortver)s/site-packages'], +# } + +sanity_check_commands = ['dp -h'] + +moduleclass = 'ai' + +# TODO: add LAMMPS support +# $EBROOTDEEPDMMINKIT +# libtensorflow_cc.so is in $EBROOTTESORFLOW/lib/python3.11/site-packages/tensorflow/libtensorflow_cc.so.2 + +# E3: OK + # -> create separate ec deepmd_with_LAMMPS + # -> it needs sources of LAMMPS not installation + # -> no version.h in include + # -> no such file -LAMMPSUtils.cmake- in LAMMPS but in sources of LAMMPS it is there + # <- after add LAMMPS the cmake crashed + # CMake Error at lmp/plugin/CMakeLists.txt:39 (include): + # include could not find requested file: + # /apps/gent/RHEL8/cascadelake-ib/software/LAMMPS/2Aug2023_update2-foss-2023a-kokkos/cmake/Modules/LAMMPSUtils.cmake + # CMake Error at lmp/plugin/CMakeLists.txt:40 (get_lammps_version): + # Unknown CMake command "get_lammps_version". +# E2: OK + # -> put hack to prebuildopts too -> OK + # <- problem with build step (make of cpp component) - even with the heck it retuns this <- log4.txt + # In file included from /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/sit + # e-packages/tensorflow/include/tensorflow/core/graph/default_device.h:21, + # from /tmp/vsc47063/easybuild/build/DeePDMkit/3.0.1/foss-2023a-TensorFlow-2.15.1/deepmd_kit-3. + # 0.1/source/api_cc/include/tf_private.h:8, + # from /tmp/vsc47063/easybuild/build/DeePDMkit/3.0.1/foss-2023a-TensorFlow-2.15.1/deepmd_kit-3. + # 0.1/source/api_cc/include/commonTF.h:6, + # from /tmp/vsc47063/easybuild/build/DeePDMkit/3.0.1/foss-2023a-TensorFlow-2.15.1/deepmd_kit-3. + # 0.1/source/api_cc/include/DataModifierTF.h:6, + # from /tmp/vsc47063/easybuild/build/DeePDMkit/3.0.1/foss-2023a-TensorFlow-2.15.1/deepmd_kit-3. + # 0.1/source/api_cc/src/DataModifierTF.cc:3: + # f protoc which is + # 17 | #error This file was generated by an older version of protoc which is +# E1: OK + # -> use Kenneth's hack in preinstallopts (module swap protobuf/3.21.9-GCCcore-12.3.0) -> works + # ? but in tensorflow-2.15.1 there is req protobuf: protobuf==4.23.4 not 3.21.9 + # same problem as with horovod: https://github.com/vscentrum/vsc-software-stack/issues/390#issuecomment-2304201251 + # <- during pip install deepmd-kit: <- log2.txt + # error This file was generated by an older version of protoc + # error incompatible with your Protocol Buffer headers + # error regenerate this file with a newer version of protoc. + # Building CXX object op/tf/CMakeFiles/deepmd_op.dir/cmake_pch.hxx.gch + # In file included from /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/op.h:26, + # from /tmp/vsc47063/easybuild/build/DeePDMkit/3.0.1/foss-2023a/deepmd_kit/deepmd_kit-3.0.1/source/op/tf/custom_op.h:9, + # from /tmp/vsc47063/easybuild/build/DeePDMkit/3.0.1/foss-2023a/deepmd_kit/deepmd_kit-3.0.1/build/py37-none-linux_x86_64/op/tf/CMakeFiles/deepmd_op.dir/cmake_pch.hxx:5, + # from : + # /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/full_type.pb.h:17:2: error: #error This file was generated by an older version of protoc which is + # 17 | #error This file was generated by an older version of protoc which is + # | ^~~~~ + # /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/full_type.pb.h:18:2: error: #error incompatible with your Protocol Buffer headers. Please + # 18 | #error incompatible with your Protocol Buffer headers. Please + # | ^~~~~ + # /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/full_type.pb.h:19:2: error: #error regenerate this file with a newer version of protoc. + # 19 | #error regenerate this file with a newer version of protoc. + # | ^~~~~ + # In file included from /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/op_def_builder.h:27, + # from /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/full_type_inference_util.h:24, + # from /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/op.h:27: + # /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/op_def.pb.h:17:2: error: #error This file was generated by an older version of protoc which is + # 17 | #error This file was generated by an older version of protoc which is + # | ^~~~~ + # /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/op_def.pb.h:18:2: error: #error incompatible with your Protocol Buffer headers. Please + # 18 | #error incompatible with your Protocol Buffer headers. Please + # | ^~~~~ + # /apps/gent/RHEL8/cascadelake-ib/software/TensorFlow/2.15.1-foss-2023a/lib/python3.11/site-packages/tensorflow/include/tensorflow/core/framework/op_def.pb.h:19:2: error: #error regenerate this file with a newer version of protoc. + # 19 | #error regenerate this file with a newer version of protoc. \ No newline at end of file diff --git a/487_DeePMD-kit/horovod_v4-PyTorch.eb b/487_DeePMD-kit/horovod_v4-PyTorch.eb index c3a38659..9118d5cc 100644 --- a/487_DeePMD-kit/horovod_v4-PyTorch.eb +++ b/487_DeePMD-kit/horovod_v4-PyTorch.eb @@ -27,7 +27,9 @@ use_pip = True sanity_pip_check = True # local_preinstallopts = 'module swap protobuf/3.21.9-GCCcore-12.3.0 && HOROVOD_WITH_MPI=1 ' -local_preinstallopts = 'HOROVOD_WITH_MPI=1 HOROVOD_WITHOUT_TENSORFLOW=1 ' +local_preinstallopts = "sed -i 's/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g' CMakeLists.txt && " +local_preinstallopts = "sed -i 's/CMAKE_CXX_STANDARD 14/CMAKE_CXX_STANDARD 17/g' horovod/torch/CMakeLists.txt && " +local_preinstallopts += 'HOROVOD_WITH_MPI=1 HOROVOD_WITHOUT_TENSORFLOW=1 ' local_preinstallopts += 'HOROVOD_WITH_PYTORCH=1 HOROVOD_WITHOUT_MXNET=1 ' exts_list = [ @@ -59,6 +61,7 @@ sanity_check_commands = ["horovodrun --help"] moduleclass = 'tools' # E1: + # -> tried to use c++17 as recomended -> not help # -> seems as a problem with new PyTorch (>2.1): https://github.com/horovod/horovod/issues/3941 # <- log3.txt - failed on pip install #