Skip to content

Commit

Permalink
Merge branch 'develop' into ic-mm-comms-overlap
Browse files Browse the repository at this point in the history
Conflicts:
	src/system/system.kathleen.make
  • Loading branch information
tkoskela committed Mar 27, 2024
2 parents 6ab122f + 64ee0cc commit 34fac21
Show file tree
Hide file tree
Showing 29 changed files with 489 additions and 145 deletions.
44 changes: 41 additions & 3 deletions .github/workflows/makefile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,19 @@ on:
branches: [ "develop" ]
pull_request:
branches: [ "develop" ]
# Manual dispatch for including all multiply kernels in matrix.
# We don't want this to run on every commit, but it's useful
# to be able to launch it manually
workflow_dispatch:
inputs:
multiply_kernel_test:
description: 'test all multiply kernels, instead of default only'
required: false
default: false

jobs:
build:
name: ${{matrix.fc}} -- ${{matrix.os}} -- ${{matrix.np}} processes -- ${{matrix.threads}} threads
name: ${{matrix.fc}} -- ${{matrix.os}} -- ${{matrix.multiply_kernel}} multiply kernel -- ${{matrix.np}} processes -- ${{matrix.threads}} threads
runs-on: ${{matrix.os}}
env:
FC: ${{matrix.fc}}
Expand All @@ -34,6 +43,35 @@ jobs:
- 2
# TODO: Could be worth testing on `mpich` in addition to `openmpi-bin`. Both are
# available in apt
multiply_kernel:
- default
- gemm
- ompDoii
- ompDoik
- ompDoji
- ompDojk
- ompGemm
- ompGemm_m
# As a work-around, copy the multiply_kernel_test variable to the matrix and
# exclude non-default multiply kernels when it's false
# https://github.com/orgs/community/discussions/26253#discussioncomment-3250989
test_all_multiply_kernels:
- ${{github.event.inputs.multiply_kernel_test}}
exclude:
- test_all_multiply_kernels: false
multiply_kernel: gemm
- test_all_multiply_kernels: false
multiply_kernel: ompDoii
- test_all_multiply_kernels: false
multiply_kernel: ompDoik
- test_all_multiply_kernels: false
multiply_kernel: ompDoji
- test_all_multiply_kernels: false
multiply_kernel: ompDojk
- test_all_multiply_kernels: false
multiply_kernel: ompGemm
- test_all_multiply_kernels: false
multiply_kernel: ompGemm_m

steps:
- uses: actions/checkout@v3
Expand Down Expand Up @@ -62,7 +100,7 @@ jobs:
- name: Build
working-directory: ${{github.workspace}}/src
run: |
make -j ${{matrix.np}} SYSTEM=gha
make -j ${{matrix.np}} SYSTEM=gha MULT_KERN=${{matrix.multiply_kernel}}
- name: Run test 001
working-directory: ${{github.workspace}}/testsuite/test_001_bulk_Si_1proc_Diag
Expand All @@ -81,7 +119,7 @@ jobs:
run: |
mpirun -np ${{matrix.np}} ../../bin/Conquest
cat Conquest_out
- name: Check test results
working-directory: ${{github.workspace}}/testsuite
run: pytest test_check_output.py
6 changes: 5 additions & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
# Required
version: 2

build:
os: ubuntu-22.04
tools:
python: "3.12"

# Build documentation in the docs/ directory with Sphinx
#sphinx:
# configuration: docs/CONQUEST-manual/conf.py
Expand All @@ -14,6 +19,5 @@ formats: all

# Optionally set the version of Python and requirements required to build your docs
python:
version: 3.7
install:
- requirements: docs/requirements.txt
26 changes: 25 additions & 1 deletion docs/groundstate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,31 @@ find the energy.

Go to :ref:`top <groundstate>`.

.. _ gs_scf_adv:
.. _gs_scf_restart:

Restarting SCF
~~~~~~~~~~~~~~

The SCF cycle can be restarted from a previous density matrix or
charge density, which may significantly speed up convergence.
The density matrix is automatically written out in the files ``Kmatrix2.*`` or
``Lmatrix2.*`` (depending on whether diagonalisation or linear scaling
is being used). These files are read in, and the initial
charge density made from them by setting the flags:

::

General.LoadDM T
SC.MakeInitialChargeFromK T

The charge density is not written out by default; this can be changed by
setting ``IO.DumpChargeDensity T`` which results in the files ``chden.nnn``
being created. To read these in as the initial charge density, the flag
``General.LoadRho T`` should be set.

Go to :ref:`top <groundstate>`.

.. _gs_scf_adv:

Advanced options
~~~~~~~~~~~~~~~~
Expand Down
48 changes: 43 additions & 5 deletions docs/input_tags.rst
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,9 @@ General.NewRun (*boolean*)

*default*: T

General.LoadL (*boolean*)
Specifies whether to load a previous L matrix from files
General.LoadDM (*boolean*)
Specifies whether to load a previous density matrix (K or L depending on
whether diagonalisation or linear scaling are selected) from files

*default*: F

Expand Down Expand Up @@ -532,6 +533,11 @@ SC.MetricFactor (*real*)

*default*: 0.1

SC.MakeInitialChargeFromK (*boolean*)
Flag determining whether initial charge is made from the density matrix

*default*: T

Go to :ref:`top <input_tags>`.

.. _input_dm:
Expand Down Expand Up @@ -658,9 +664,12 @@ Diag.GammaCentred (*boolean*)
*default*: F

Diag.PaddingHmatrix (*boolean*)
After v1.2, we have introduced a method to have an optimum value of
block size for Hamiltonian and overlap matrices (See below) by padding.
By setting 'F', we do not use the method.
Setting this flag allows the Hamiltonian and overlap matrices to be
made larger than their physical size, so that ScaLAPACK block sizes can
be set to any value (which can significantly improve efficiency). At present, the
automatic setting of block sizes does not use this functionality; if
desired, block sizes must be set manually (note that the optimum block
size is likely to be different on different machines). (Available from v1.2)

*default*: T

Expand All @@ -677,6 +686,9 @@ Diag.BlockSizeC (*integer*)
(i.e. :math:`\sum_{\mathrm{atoms}}\mathrm{NSF(atom)}`). A value of 64 is considered
optimal by the ScaLAPACK user’s guide.

If Diag.PaddingHmatrix is set to true then the block sizes can take any value,
but BlockSizeR and BlockSizeC must be the same.

*default*: Determined automatically

Diag.MPShift[X/Y/Z] (*real*)
Expand Down Expand Up @@ -1161,6 +1173,32 @@ MD.BaroDebug (*boolean*)

*default*: F

MD.VariableTemperature (*boolean*)
Simulation with a variable temperature if .True.

*default*: F

MD.VariableTemperatureMethod (*string*)
Type of temperature profile. Only ``linear`` temperature profile is implemented.

*default*: linear

MD.VariableTemperatureRate (*real*)
Change rate for the temperature. In units of K/fs.
If positive, heating. If negative, cooling.

*default*: 0.0

MD.InitialTemperature(*real*)
Initial temperature.

*default*: same as AtomMove.IonTemperature

MD.FinalTemperature(*real*)
Final temperature.

*default*: same as AtomMove.IonTemperature

Go to :ref:`top <input_tags>`.

.. _input_spin:
Expand Down
57 changes: 45 additions & 12 deletions docs/installing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ it can be cloned:

where ``destination-directory`` should be set by the user.
Alternatively, it can be downloaded from GitHub as a zip file and
unpacked:
unpacked:

`<https://github.com/OrderN/CONQUEST-release/archive/master.zip>`_

Go to :ref:`top <install>`

.. _install_compile:

Compiling
Expand All @@ -41,17 +43,17 @@ a working MPI installation including a Fortran90 compiler (often
can be obtained from `the netlib repository <http://www.netlib.org/scalapack/>`_ if
you need to compile it)

Additionally, Conquest can use LibXC if it is available (v2.x or
Additionally, Conquest can use LibXC if it is available (v4.x or
later).

The library locations are set in the ``system.<systemname>.make`` file in the ``src/system``
directory, along with other parameters needed for compilation. ``system.<systemname>.make``
The library locations are set in the ``system.make`` file in the ``src/system``
directory, along with other parameters needed for compilation. The default file
name is ``system.make`` but you can select another file with ``make SYSTEM=label``
which would then use the file ``system.label.make`` in the ``src/system`` directory.
``system.<systemname>.make``
files are provided for some HPC systems used by the community, but if you want to run
locally or on a different system, you need to provide an appropriate ``system.<systemname>.make``
file. Use ``src/system/system.example.make`` as a starting point. Get the ``<systemname>``
by running ``hostname -d`` in your prompt, then name your file appropriately and move it to
the ``src/system`` directory. If ``hostname -d`` returns empty (e.g. you are running on a
local machine), the system-specific makefile should be named ``system.make``.
locally or on a different system, you will need to create an appropriate ``system.make``
file. Use ``src/system/system.example.make`` as a starting point.

* ``FC`` (typically ``FC=mpif90`` will be all that is required)
* ``COMPFLAGS`` (set these to specify compiler options such as
Expand All @@ -60,8 +62,8 @@ local machine), the system-specific makefile should be named ``system.make``.
* ``SCALAPACK`` (specify the ScaLAPACK library)
* ``FFT_LIB`` (must be left as FFTW)
* ``XC_LIBRARY`` (choose ``XC_LIBRARY=CQ`` for the internal Conquest
library, otherwise ``XC_LIBRARY=LibXC_v2or3`` for LibXC v2.x or v3.x, or ``XC_LIBRARY=LibXC_v4``
for LibXC v4.x)
library, otherwise ``XC_LIBRARY=LibXC_v4`` for LibXC v4.x, or ``XC_LIBRARY=LibXC_v5``
for LibXC v5.x and v6.x)
* Two further options need to be set for LibXC:

+ ``XC_LIB`` (specify the XC libraries)
Expand All @@ -73,11 +75,42 @@ Once these are set, you should make the executable using ``make``.
The ion file generation code is compiled using the same options
required for the main code.

Go to :ref:`top <install>`

Multi-threading
~~~~~~~~~~~~~~~

CONQUEST can use OpenMP for multi-threading; some multi-threading is available throughout the code, while there are specific matrix multiplication routines which can use multi-threading for the linear scaling solver. The number of threads is set via the environment variable ``OMP_NUM_THREADS``.

Compiler flags to enable OpenMP are dependent on the vendor, but should be specified via ``COMPFLAGS`` and ``LINKFLAGS`` in the ``system.make`` file. If compiling with OpenMP then you should also change the variable ``OMP_DUMMY`` in the same file to be blank to enable the number of threads to be included in the output.
Compiler flags to enable OpenMP are dependent on the vendor, but should be specified via ``OMPFLAGS`` in the ``system.make`` file. If compiling with OpenMP then you should also change the variable ``OMP_DUMMY`` in the same file to be blank to enable the number of threads to be included in the output.

On some systems, the default stack size for OpenMP is set to be rather small, and this can cause a segmentation fault when running with multiple threads. We recommend testing the effect of the environment variable ``OMP_STACKSIZE`` (and suggest setting it to 50M or larger as a first test).

Go to :ref:`top <install>`

.. _install_spack:

Installing with Spack
-----------

CONQUEST and all of its dependencies can be installed with `Spack <https://spack.io/>`_.
The CONQUEST package requires Spack v0.21 or later. If Spack isn't available or up to date on your
system, it is relatively straightforward to install it with user permissions following the
`install instructions <https://spack.readthedocs.io/en/latest/getting_started.html#installation>`_.
When setting up Spack on a new system, it is recommended to configure it to use available
`system compilers <https://spack.readthedocs.io/en/latest/getting_started.html#compiler-configuration>`_
and `system packages <https://spack.readthedocs.io/en/latest/getting_started.html#system-packages>`_.
Once spack is installed and set up, install CONQUEST with:

``spack install conquest``

and load the ``Conquest`` executable to ``PATH`` with

``spack load conquest``

The build can be customized by adding options to the
`Spack spec <https://spack.readthedocs.io/en/latest/basic_usage.html#specs-dependencies>`_ ``conquest``.
The CONQUEST package includes variants for OpenMP support and different matrix multiplication kernels; more details can be found in the `Spack CONQUEST package <https://spack.readthedocs.io/en/latest/package_list.html#conquest>`_.

Go to :ref:`top <install>`

21 changes: 15 additions & 6 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,21 @@ MULT_KERN = default
DIAG_DUMMY =

#Include system-dependent variables
#Get the system name automatically.
SYSTEM = $(shell hostname -d)
ifneq ($(SYSTEM),)
$(info System is $(SYSTEM))
SYSTEM_PATH = system/system.$(SYSTEM).make
else
SYSTEM_PATH = system/system.make
SYSTEM_PATH=system/system.make
endif
ifneq ("$(wildcard $(SYSTEM_PATH))","")
$(info Building on SYSTEM $(SYSTEM), using makefile $(SYSTEM_PATH))
$(info Building using system file $(SYSTEM_PATH))
include $(SYSTEM_PATH)
else
$(error Cannot find $(SYSTEM_PATH) file for system $(SYSTEM). Please make one, using system/system.example.make as an example)
$(info Cannot find system file $(SYSTEM_PATH). Please make one,)
$(info using system/system.example.make as an example, or choose)
$(info an existing file from the system directory using make SYSTEM=label)
$(info to select system/system.label.make)
$(error Compilation aborted.)
endif

#Include lists of object files
Expand Down Expand Up @@ -77,7 +80,7 @@ deps.obj.inc: $(SRCS_NODS) $(SYSTEM_PATH)

#Target
$(TARGET) : $(NODE_OBJECTS)
$(FC) $(LINKFLAGS) -o $(TARGET) $(NODE_OBJECTS) $(LIBS)
$(FC) -o $(TARGET) $(NODE_OBJECTS) $(LINKFLAGS) $(LIBS)
cp $(TARGET) ../bin

#.f90.o:
Expand All @@ -95,6 +98,12 @@ $(NODE_OBJECTS):
initial_read_module.o:initial_read_module.f90 datestamp.o
$(FC) $(COMPFLAGS) -c $<

# Note: this module seems to need compiling without optimisation
# for GCC13, possibly only on Mac. It doesn't need any other
# compiler flags (libraries or communications) so should be OK like this
pseudo_tm_info.o:pseudo_tm_info.f90
$(FC) -c $<

#datestamp.f90: $(COMMENT)
# $(ECHOSTR) "module datestamp\n" > datestamp.f90
# $(ECHOSTR) " implicit none\n" >> datestamp.f90
Expand Down
Loading

0 comments on commit 34fac21

Please sign in to comment.