diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 822661d27..e37f5ce88 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ on: jobs: checks: - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 @@ -21,7 +21,7 @@ jobs: uses: pre-commit/action@v3.0.0 build: - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - uses: actions/checkout@v3 - name: Install ubuntu prerequisites @@ -62,7 +62,7 @@ jobs: publish_site: needs: [checks, build] if: ${{ github.ref == 'refs/heads/main' }} - runs-on: ubuntu-latest + runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-latest' || 'ubuntu-latest' }} steps: - name: Checkout gh-pages uses: actions/checkout@v3 diff --git a/oneapi-doc.json b/oneapi-doc.json index 79c3616a2..45b03cd0f 100644 --- a/oneapi-doc.json +++ b/oneapi-doc.json @@ -1,5 +1,3 @@ { - "version": "1.2-rev-1", - "vpl_version": "2.9.0", - "art_version": "1.0-rev-1" + "version": "1.3-provisional-rev-1" } diff --git a/scripts/oneapi.py b/scripts/oneapi.py index 1d4ce2129..9b8cd6295 100644 --- a/scripts/oneapi.py +++ b/scripts/oneapi.py @@ -228,20 +228,6 @@ def remove_elements(li, elements): return li -@action -def update_oneart(root, target=None): - for component in ['embree', 'ospray']: - copy( - f'repos/{component}/doc/{component}-spec.rst', - 'source/elements/oneART/source', - ) - for component in ['oidn', 'openvkl']: - copy( - f'repos/{component}/doc/tmp/{component}-spec.rst', - 'source/elements/oneART/source', - ) - - @action def sort_words(root, target=None): with open(join('source', 'spelling_wordlist.txt')) as fin: @@ -262,7 +248,6 @@ def sort_words(root, target=None): 'singlehtml': build, 'prep': prep, 'sort-words': sort_words, - 'update-oneart': update_oneart, } dirs = [ @@ -270,7 +255,6 @@ def sort_words(root, target=None): 'oneDAL', 'oneMKL', 'oneTBB', - 'oneVPL', 'sycl', 'l0', 'oneDPL', diff --git a/source/conf.py b/source/conf.py index 823a2112a..908b6fc09 100644 --- a/source/conf.py +++ b/source/conf.py @@ -172,7 +172,6 @@ breathe_projects = { "oneCCL": "elements/oneCCL/doxygen/xml", "oneDNN": "elements/oneDNN/doxygen/xml", - "oneVPL": "elements/oneVPL/doxygen/xml", } breathe_default_project = 'oneAPI' diff --git a/source/conf/common_conf.py b/source/conf/common_conf.py index 41669cc73..51f3938f3 100644 --- a/source/conf/common_conf.py +++ b/source/conf/common_conf.py @@ -25,9 +25,6 @@ sys.path.insert( 0, abspath(join(repo_root, 'source', 'elements', 'oneDAL')) # noqa: F821 ) -sys.path.insert( - 0, abspath(join(repo_root, 'source', 'elements', 'oneVPL')) # noqa: F821 -) extensions = [ 'sphinx.ext.autodoc', @@ -47,7 +44,6 @@ 'sphinxcontrib.plantuml', 'breathe', 'dalapi', # oneDAL API generator - 'vplapi', ] with open(join(repo_root, 'oneapi-doc.json')) as fin: # noqa: F821 @@ -55,7 +51,6 @@ env = { 'oneapi_version': cfg['version'], - 'vpl_spec_version': cfg['vpl_version'], } prolog_template = string.Template( @@ -73,8 +68,6 @@ .. |l0_full_name| replace:: oneAPI Level Zero .. |tbb_full_name| replace:: oneAPI Threading Building Blocks .. |tbb_version| replace:: $oneapi_version -.. |vpl_full_name| replace:: oneAPI Video Processing Library -.. |vpl_version| replace:: $vpl_spec_version .. |mkl_full_name| replace:: oneAPI Math Kernel Library .. |mkl_version| replace:: $oneapi_version .. include:: diff --git a/source/elements/element_list.rst b/source/elements/element_list.rst index ba77a2892..c31dc1c56 100644 --- a/source/elements/element_list.rst +++ b/source/elements/element_list.rst @@ -17,12 +17,5 @@ - :ref:`oneDAL-section`: Algorithms for accelerated data science - :ref:`oneTBB-section`: Library for adding thread-based parallelism to complex applications on multiprocessors -- :ref:`oneVPL-section`: Algorithms for accelerated video processing - :ref:`oneMKL-section`: High performance math routines for science, engineering, and financial applications -- :ref:`oneART-section`: A set of advanced ray tracing and - high-fidelity rendering and computation routines for use in a wide - variety of 3D graphics uses including, film and television - photorealistic visual effects and animation rendering, scientific - visualization, high-performance computing computations, gaming, and - more. diff --git a/source/elements/oneART/source/acknowledgment.rst b/source/elements/oneART/source/acknowledgment.rst deleted file mode 100644 index 4fa144cd6..000000000 --- a/source/elements/oneART/source/acknowledgment.rst +++ /dev/null @@ -1,11 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _acknowledgment-section: - -============== -Acknowledgment -============== - - diff --git a/source/elements/oneART/source/appendices.rst b/source/elements/oneART/source/appendices.rst deleted file mode 100644 index e2494863e..000000000 --- a/source/elements/oneART/source/appendices.rst +++ /dev/null @@ -1,27 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -========== -Appendices -========== - -- :ref:`ospray-studio-section`: Rendering Focused Application fully - utilizing the OSPRay API. -- :ref:`ospray-hydra-plugin-section`: Universal Scene Description - “Hydra API Delegate” using OSPRay for scalable interactive and - real-time ray traced preview -- :ref:`ispc-section`: Single Program Multi-Data Vectorizing Compiler -- :ref:`future-section` -- :ref:`acknowledgment-section` - -.. toctree:: - :hidden: - :maxdepth: 1 - - ospray-studio - ospray-hydra-plugin - ispc - future-consider - acknowledgment - diff --git a/source/elements/oneART/source/component-libraries.rst b/source/elements/oneART/source/component-libraries.rst deleted file mode 100644 index 25620662a..000000000 --- a/source/elements/oneART/source/component-libraries.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. SPDX-FileCopyrightText: 2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -=================== -Component Libraries -=================== - -There are 4 domains. - -- :ref:`embree-section`: Geometric Ray Tracing Kernel API -- :ref:`openvkl-section`: Volumetric Ray Tracing Kernel API -- :ref:`oidn-section`: High-Fidelity [AI] Image Denoising API -- :ref:`ospray-section`: Middleware Scalable Ray Tracing and Rendering API - -.. toctree:: - :hidden: - :maxdepth: 1 - - embree - openvkl - oidn - ospray diff --git a/source/elements/oneART/source/conf.py b/source/elements/oneART/source/conf.py deleted file mode 100644 index 30cceabad..000000000 --- a/source/elements/oneART/source/conf.py +++ /dev/null @@ -1,42 +0,0 @@ -# SPDX-FileCopyrightText: 2019-2020 Intel Corporation -# -# SPDX-License-Identifier: MIT - -# -*- coding: utf-8 -*- -# -# Configuration file for the Sphinx documentation builder. -# -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -from os.path import join - - -project = 'Ray Tracing' - -repo_root = join('..', '..', '..', '..') -exec(open(join(repo_root, 'source', 'conf', 'common_conf.py')).read()) -exec(open(join(repo_root, 'source', 'conf', 'element_conf.py')).read()) - -exclude_patterns = ['nested-*.rst', - '**/*.inc.rst', - '*.inc.rst', -] - -html_theme_options = { - 'repository_url': 'https://github.com/oneapi-src/oneapi-spec', - 'path_to_docs': 'source/elements/oneART/source', - 'use_issues_button': True, - 'use_edit_page_button': True, - 'repository_branch': 'main', - 'search_bar_text': 'Search the spec...' -} diff --git a/source/elements/oneART/source/embree-spec.rst b/source/elements/oneART/source/embree-spec.rst deleted file mode 100644 index 557a20172..000000000 --- a/source/elements/oneART/source/embree-spec.rst +++ /dev/null @@ -1,8340 +0,0 @@ -Introduction -============ - -The Embree API is a low-level C99 ray tracing API which can be used to -construct 3D scenes and perform ray queries of different types inside -these scenes. All API calls carry the prefix ``rtc`` (or ``RTC`` for -types) which stands for **r**\ ay **t**\ racing **c**\ ore. - -The API also exists in an ISPC version, which is almost identical but -contains additional functions that operate on ray packets with a size of -the native SIMD width used by ISPC. For simplicity this document refers -to the C99 version of the API functions. For changes when upgrading from -the Embree 2 to the current Embree 3 API see Section [Upgrading from -Embree 2 to Embree 3]. - -The API supports scenes consisting of different geometry types such as -triangle meshes, quad meshes (triangle pairs), grid meshes, flat curves, -round curves, oriented curves, subdivision meshes, instances, and -user-defined geometries. See Section `Scene Object <#scene-object>`__ -for more information. - -Finding the closest hit of a ray segment with the scene -(``rtcIntersect``-type functions), and determining whether any hit -between a ray segment and the scene exists (``rtcOccluded``-type -functions) are both supported. The API supports queries for single rays, -ray packets, and ray streams. See Section `Ray Queries <#ray-queries>`__ -for more information. - -The API is designed in an object-oriented manner, e.g. it contains -device objects (``RTCDevice`` type), scene objects (``RTCScene`` type), -geometry objects (``RTCGeometry`` type), buffer objects (``RTCBuffer`` -type), and BVH objects (``RTCBVH`` type). All objects are reference -counted, and handles can be released by calling the appropriate release -function (e.g. ``rtcReleaseDevice``) or retained by incrementing the -reference count (e.g. ``rtcRetainDevice``). In general, API calls that -access the same object are not thread-safe, unless specified -differently. However, attaching geometries to the same scene and -performing ray queries in a scene is thread-safe. - -Device Object -------------- - -Embree supports a device concept, which allows different components of -the application to use the Embree API without interfering with each -other. An application typically first creates a device using the -`rtcNewDevice <#rtcnewdevice>`__ function. This device can then be used -to construct further objects, such as scenes and geometries. Before the -application exits, it should release all devices by invoking -`rtcReleaseDevice <#rtcreleasedevice>`__. An application typically -creates only a single device. If required differently, it should only -use a small number of devices at any given time. - -Each user thread has its own error flag per device. If an error occurs -when invoking an API function, this flag is set to an error code (if it -isn’t already set by a previous error). See Section -`rtcGetDeviceError <#rtcgetdeviceerror>`__ for information on how to -read the error code and Section -`rtcSetDeviceErrorFunction <#rtcsetdeviceerrorfunction>`__ on how to -register a callback that is invoked for each error encountered. It is -recommended to always set a error callback function, to detect all -errors. - -Scene Object ------------- - -A scene is a container for a set of geometries, and contains a spatial -acceleration structure which can be used to perform different types of -ray queries. - -A scene is created using the ``rtcNewScene`` function call, and released -using the ``rtcReleaseScene`` function call. To populate a scene with -geometries use the ``rtcAttachGeometry`` call, and to detach them use -the ``rtcDetachGeometry`` call. Once all scene geometries are attached, -an ``rtcCommitScene`` call (or ``rtcJoinCommitScene`` call) will finish -the scene description and trigger building of internal data structures. -After the scene got committed, it is safe to perform ray queries (see -Section `Ray Queries <#ray-queries>`__) or to query the scene bounding -box (see `rtcGetSceneBounds <#rtcgetscenebounds>`__ and -`rtcGetSceneLinearBounds <#rtcgetscenelinearbounds>`__). - -If scene geometries get modified or attached or detached, the -``rtcCommitScene`` call must be invoked before performing any further -ray queries for the scene; otherwise the effect of the ray query is -undefined. The modification of a geometry, committing the scene, and -tracing of rays must always happen sequentially, and never at the same -time. Any API call that sets a property of the scene or geometries -contained in the scene count as scene modification, e.g. including -setting of intersection filter functions. - -Scene flags can be used to configure a scene to use less memory -(``RTC_SCENE_FLAG_COMPACT``), use more robust traversal algorithms -(``RTC_SCENE_FLAG_ROBUST``), and to optimize for dynamic content. See -Section `rtcSetSceneFlags <#rtcsetsceneflags>`__ for more details. - -A build quality can be specified for a scene to balance between -acceleration structure build performance and ray query performance. See -Section `rtcSetSceneBuildQuality <#rtcsetscenebuildquality>`__ for more -details on build quality. - -Geometry Object ---------------- - -A new geometry is created using the ``rtcNewGeometry`` function. -Depending on the geometry type, different buffers must be bound -(e.g. using ``rtcSetSharedGeometryBuffer``) to set up the geometry data. -In most cases, binding of a vertex and index buffer is required. The -number of primitives and vertices of that geometry is typically inferred -from the size of these bound buffers. - -Changes to the geometry always must be committed using the -``rtcCommitGeometry`` call before using the geometry. After committing, -a geometry is not included in any scene. A geometry can be added to a -scene by using the ``rtcAttachGeometry`` function (to automatically -assign a geometry ID) or using the ``rtcAttachGeometryById`` function -(to specify the geometry ID manually). A geometry can get attached to -multiple scenes. - -All geometry types support multi-segment motion blur with an arbitrary -number of equidistant time steps (in the range of 2 to 129) inside a -user specified time range. Each geometry can have a different number of -time steps and a different time range. The motion blur geometry is -defined by linearly interpolating the geometries of neighboring time -steps. To construct a motion blur geometry, first the number of time -steps of the geometry must be specified using the -``rtcSetGeometryTimeStepCount`` function, and then a vertex buffer for -each time step must be bound, e.g. using the -``rtcSetSharedGeometryBuffer`` function. Optionally, a time range -defining the start (and end time) of the first (and last) time step can -be set using the ``rtcSetGeometryTimeRange`` function. This feature will -also allow geometries to appear and disappear during the camera shutter -time if the time range is a sub range of [0,1]. - -The API supports per-geometry filter callback functions (see -``rtcSetGeometryIntersectFilterFunction`` and -``rtcSetGeometryOccludedFilterFunction``) that are invoked for each -intersection found during the ``rtcIntersect``-type or -``rtcOccluded``-type calls. The former ones are called geometry -intersection filter functions, the latter ones geometry occlusion filter -functions. These filter functions are designed to be used to ignore -intersections outside of a user-defined silhouette of a primitive, -e.g. to model tree leaves using transparency textures. - -Ray Queries ------------ - -The API supports finding the closest hit of a ray segment with the scene -(``rtcIntersect``-type functions), and determining whether any hit -between a ray segment and the scene exists (``rtcOccluded``-type -functions). - -Supported are single ray queries (``rtcIntersect1`` and -``rtcOccluded1``) as well as ray packet queries for ray packets of size -4 (``rtcIntersect4`` and ``rtcOccluded4``), ray packets of size 8 -(``rtcIntersect8`` and ``rtcOccluded8``), and ray packets of size 16 -(``rtcIntersect16`` and ``rtcOccluded16``). - -Ray streams in a variety of layouts are supported as well, such as -streams of single rays (``rtcIntersect1M`` and ``rtcOccluded1M``), -streams of pointers to single rays (``rtcIntersect1p`` and -``rtcOccluded1p``), streams of ray packets (``rtcIntersectNM`` and -``rtcOccludedNM``), and large packet-like streams in structure of -pointer layout (``rtcIntersectNp`` and ``rtcOccludedNp``). - -See Sections `rtcIntersect1 <#rtcintersect1>`__ and -`rtcOccluded1 <#rtcoccluded1>`__ for a detailed description of how to -set up and trace a ray. - -See tutorial `Triangle Geometry `__ -for a complete example of how to trace single rays and ray packets. Also -have a look at the tutorial `Stream -Viewer `__ for an example of how to trace -ray streams. - -Point Queries -------------- - -The API supports traversal of the BVH using a point query object that -specifies a location and a query radius. For all primitives intersecting -the according domain, a user defined callback function is called which -allows queries such as finding the closest point on the surface -geometries of the scene (see Tutorial `Closest -Point `__) or nearest neighbour queries -(see Tutorial `Voronoi `__). - -See Section `rtcPointQuery <#rtcpointquery>`__ for a detailed -description of how to set up point queries. - -Collision Detection -------------------- - -The Embree API also supports collision detection queries between two -scenes consisting only of user geometries. Embree only performs -broadphase collision detection, the narrow phase detection can be -performed through a callback function. - -See Section `rtcCollide <#rtccollide>`__ for a detailed description of -how to set up collision detection. - -Seen tutorial `Collision Detection <#collision-detection>`__ for a -complete example of collision detection being used on a simple cloth -solver. - -Miscellaneous -------------- - -A context filter function, which can be set per ray query is supported -(see ``rtcInitIntersectContext``). This filter function is designed to -change the semantics of the ray query, e.g. to accumulate opacity for -transparent shadows, count the number of surfaces along a ray, collect -all hits along a ray, etc. - -The internal algorithms to build a BVH are exposed through the -``RTCBVH`` object and ``rtcBuildBVH`` call. This call makes it possible -to build a BVH in a user-specified format over user-specified -primitives. See the documentation of the ``rtcBuildBVH`` call for more -details. - -For getting the most performance out of Embree, see the Section -[Performance Recommendations]. - -.. raw:: latex - - \pagebreak - -Embree API -========== - -rtcNewDevice ------------- - -NAME -^^^^ - -.. code:: cpp - - rtcNewDevice - creates a new device - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCDevice rtcNewDevice(const char* config); - -DESCRIPTION -^^^^^^^^^^^ - -This function creates a new device and returns a handle to this device. -The device object is reference counted with an initial reference count -of 1. The handle can be released using the ``rtcReleaseDevice`` API -call. - -The device object acts as a class factory for all other object types. -All objects created from the device (like scenes, geometries, etc.) hold -a reference to the device, thus the device will not be destroyed unless -these objects are destroyed first. - -Objects are only compatible if they belong to the same device, e.g it is -not allowed to create a geometry in one device and attach it to a scene -created with a different device. - -A configuration string (``config`` argument) can be passed to the device -construction. This configuration string can be ``NULL`` to use the -default configuration. - -The following configuration is supported: - -- ``threads=[int]``: Specifies a number of build threads to use. A - value of 0 enables all detected hardware threads. By default all - hardware threads are used. - -- ``user_threads=[int]``: Sets the number of user threads that can be - used to join and participate in a scene commit using - ``rtcJoinCommitScene``. The tasking system will only use - threads-user_threads many worker threads, thus if the app wants to - solely use its threads to commit scenes, just set threads equal to - user_threads. This option only has effect with the Intel(R) Threading - Building Blocks (TBB) tasking system. - -- ``set_affinity=[0/1]``: When enabled, build threads are affinitized - to hardware threads. This option is disabled by default on standard - CPUs, and enabled by default on Xeon Phi Processors. - -- ``start_threads=[0/1]``: When enabled, the build threads are started - upfront. This can be useful for benchmarking to exclude thread - creation time. This option is disabled by default. - -- ``isa=[sse2,sse4.2,avx,avx2,avx512]``: Use specified ISA. By default - the ISA is selected automatically. - -- ``max_isa=[sse2,sse4.2,avx,avx2,avx512]``: Configures the automated - ISA selection to use maximally the specified ISA. - -- ``hugepages=[0/1]``: Enables or disables usage of huge pages. Under - Linux huge pages are used by default but under Windows and macOS they - are disabled by default. - -- ``enable_selockmemoryprivilege=[0/1]``: When set to 1, this enables - the ``SeLockMemoryPrivilege`` privilege with is required to use huge - pages on Windows. This option has an effect only under Windows and is - ignored on other platforms. See Section [Huge Page Support] for more - details. - -- ``verbose=[0,1,2,3]``: Sets the verbosity of the output. When set to - 0, no output is printed by Embree, when set to a higher level more - output is printed. By default Embree does not print anything on the - console. - -- ``frequency_level=[simd128,simd256,simd512]``: Specifies the - frequency level the application want to run on, which can be either: - - a) simd128 to run at highest frequency - b) simd256 to run at AVX2-heavy frequency level - c) simd512 to run at heavy AVX512 frequency level. When some - frequency level is specified, Embree will avoid doing - optimizations that may reduce the frequency level below the level - specified. E.g. if your app does not use AVX instructions setting - “frequency_level=simd128” will cause some CPUs to run at highest - frequency, which may result in higher application performance if - you do much shading. If you application heavily uses AVX code, you - should best set the frequency level to simd256. Per default Embree - tries to avoid reducing the frequency of the CPU by setting the - simd256 level only when the CPU has no significant down clocking. - -Different configuration options should be separated by commas, e.g.: - -.. code:: cpp - - rtcNewDevice("threads=1,isa=avx"); - -EXIT STATUS -^^^^^^^^^^^ - -On success returns a handle of the created device. On failure returns -``NULL`` as device and sets a per-thread error code that can be queried -using ``rtcGetDeviceError(NULL)``. - -SEE ALSO -^^^^^^^^ - -`rtcRetainDevice <#rtcretaindevice>`__, -`rtcReleaseDevice <#rtcreleasedevice>`__ - -.. raw:: latex - - \pagebreak - -rtcRetainDevice ---------------- - -NAME -^^^^ - -.. code:: cpp - - rtcRetainDevice - increments the device reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcRetainDevice(RTCDevice device); - -DESCRIPTION -^^^^^^^^^^^ - -Device objects are reference counted. The ``rtcRetainDevice`` function -increments the reference count of the passed device object (``device`` -argument). This function together with ``rtcReleaseDevice`` allows to -use the internal reference counting in a C++ wrapper class to manage the -ownership of the object. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewDevice <#rtcnewdevice>`__, -`rtcReleaseDevice <#rtcreleasedevice>`__ - -.. raw:: latex - - \pagebreak - -rtcReleaseDevice ----------------- - -NAME -^^^^ - -.. code:: cpp - - rtcReleaseDevice - decrements the device reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcReleaseDevice(RTCDevice device); - -DESCRIPTION -^^^^^^^^^^^ - -Device objects are reference counted. The ``rtcReleaseDevice`` function -decrements the reference count of the passed device object (``device`` -argument). When the reference count falls to 0, the device gets -destroyed. - -All objects created from the device (like scenes, geometries, etc.) hold -a reference to the device, thus the device will not get destroyed unless -these objects are destroyed first. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewDevice <#rtcnewdevice>`__, `rtcRetainDevice <#rtcretaindevice>`__ - -.. raw:: latex - - \pagebreak - -rtcGetDeviceProperty --------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetDeviceProperty - queries properties of the device - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - ssize_t rtcGetDeviceProperty( - RTCDevice device, - enum RTCDeviceProperty prop - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetDeviceProperty`` function can be used to query properties -(``prop`` argument) of a device object (``device`` argument). The -returned property is an integer of type ``ssize_t``. - -Possible properties to query are: - -- ``RTC_DEVICE_PROPERTY_VERSION``: Queries the combined version number - (MAJOR.MINOR.PATCH) with two decimal digits per component. E.g. for - Embree 2.8.3 the integer 208003 is returned. - -- ``RTC_DEVICE_PROPERTY_VERSION_MAJOR``: Queries the major version - number of Embree. - -- ``RTC_DEVICE_PROPERTY_VERSION_MINOR``: Queries the minor version - number of Embree. - -- ``RTC_DEVICE_PROPERTY_VERSION_PATCH``: Queries the patch version - number of Embree. - -- ``RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED``: Queries whether the - ``rtcIntersect4`` and ``rtcOccluded4`` functions preserve packet size - and ray order when invoking callback functions. This is only the case - if Embree is compiled with ``EMBREE_RAY_PACKETS`` and ``SSE2`` (or - ``SSE4.2``) enabled, and if the machine it is running on supports - ``SSE2`` (or ``SSE4.2``). - -- ``RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED``: Queries whether the - ``rtcIntersect8`` and ``rtcOccluded8`` functions preserve packet size - and ray order when invoking callback functions. This is only the case - if Embree is compiled with ``EMBREE_RAY_PACKETS`` and ``AVX`` (or - ``AVX2``) enabled, and if the machine it is running on supports - ``AVX`` (or ``AVX2``). - -- ``RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED``: Queries whether the - ``rtcIntersect16`` and ``rtcOccluded16`` functions preserve packet - size and ray order when invoking callback functions. This is only the - case if Embree is compiled with ``EMBREE_RAY_PACKETS`` and ``AVX512`` - enabled, and if the machine it is running on supports ``AVX512``. - -- ``RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED``: Queries whether - ``rtcIntersect1M``, ``rtcIntersect1Mp``, ``rtcIntersectNM``, - ``rtcIntersectNp``, ``rtcOccluded1M``, ``rtcOccluded1Mp``, - ``rtcOccludedNM``, and ``rtcOccludedNp`` are supported. This is only - the case if Embree is compiled with ``EMBREE_RAY_PACKETS`` enabled. - -- ``RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED``: Queries whether ray masks - are supported. This is only the case if Embree is compiled with - ``EMBREE_RAY_MASK`` enabled. - -- ``RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED``: Queries whether - back face culling is enabled. This is only the case if Embree is - compiled with ``EMBREE_BACKFACE_CULLING`` enabled. - -- ``RTC_DEVICE_PROPERTY_COMPACT_POLYS_ENABLED``: Queries whether - compact polys is enabled. This is only the case if Embree is compiled - with ``EMBREE_COMPACT_POLYS`` enabled. - -- ``RTC_DEVICE_PROPERTY_FILTER_FUNCTION_SUPPORTED``: Queries whether - filter functions are supported, which is the case if Embree is - compiled with ``EMBREE_FILTER_FUNCTION`` enabled. - -- ``RTC_DEVICE_PROPERTY_IGNORE_INVALID_RAYS_ENABLED``: Queries whether - invalid rays are ignored, which is the case if Embree is compiled - with ``EMBREE_IGNORE_INVALID_RAYS`` enabled. - -- ``RTC_DEVICE_PROPERTY_TRIANGLE_GEOMETRY_SUPPORTED``: Queries whether - triangles are supported, which is the case if Embree is compiled with - ``EMBREE_GEOMETRY_TRIANGLE`` enabled. - -- ``RTC_DEVICE_PROPERTY_QUAD_GEOMETRY_SUPPORTED``: Queries whether - quads are supported, which is the case if Embree is compiled with - ``EMBREE_GEOMETRY_QUAD`` enabled. - -- ``RTC_DEVICE_PROPERTY_SUBDIVISION_GEOMETRY_SUPPORTED``: Queries - whether subdivision meshes are supported, which is the case if Embree - is compiled with ``EMBREE_GEOMETRY_SUBDIVISION`` enabled. - -- ``RTC_DEVICE_PROPERTY_CURVE_GEOMETRY_SUPPORTED``: Queries whether - curves are supported, which is the case if Embree is compiled with - ``EMBREE_GEOMETRY_CURVE`` enabled. - -- ``RTC_DEVICE_PROPERTY_POINT_GEOMETRY_SUPPORTED``: Queries whether - points are supported, which is the case if Embree is compiled with - ``EMBREE_GEOMETRY_POINT`` enabled. - -- ``RTC_DEVICE_PROPERTY_USER_GEOMETRY_SUPPORTED``: Queries whether user - geometries are supported, which is the case if Embree is compiled - with ``EMBREE_GEOMETRY_USER`` enabled. - -- ``RTC_DEVICE_PROPERTY_TASKING_SYSTEM``: Queries the tasking system - Embree is compiled with. Possible return values are: - - 0. internal tasking system - 1. Intel Threading Building Blocks (TBB) - 2. Parallel Patterns Library (PPL) - -- ``RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED``: Queries whether - ``rtcJoinCommitScene`` is supported. This is not the case when Embree - is compiled with PPL or older versions of TBB. - -- ``RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED``: Queries whether - ``rtcCommitScene`` can get invoked from multiple TBB worker threads - concurrently. This feature is only supported starting with TBB 2019 - Update 9. - -EXIT STATUS -^^^^^^^^^^^ - -On success returns the value of the queried property. For properties -returning a boolean value, the return value 0 denotes ``false`` and 1 -denotes ``true``. - -On failure zero is returned and an error code is set that can be queried -using ``rtcGetDeviceError``. - -.. raw:: latex - - \pagebreak - -rtcGetDeviceError ------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcGetDeviceError - returns the error code of the device - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCError rtcGetDeviceError(RTCDevice device); - -DESCRIPTION -^^^^^^^^^^^ - -Each thread has its own error code per device. If an error occurs when -calling an API function, this error code is set to the occurred error if -it stores no previous error. The ``rtcGetDeviceError`` function reads -and returns the currently stored error and clears the error code. This -assures that the returned error code is always the first error occurred -since the last invocation of ``rtcGetDeviceError``. - -Possible error codes returned by ``rtcGetDeviceError`` are: - -- ``RTC_ERROR_NONE``: No error occurred. - -- ``RTC_ERROR_UNKNOWN``: An unknown error has occurred. - -- ``RTC_ERROR_INVALID_ARGUMENT``: An invalid argument was specified. - -- ``RTC_ERROR_INVALID_OPERATION``: The operation is not allowed for the - specified object. - -- ``RTC_ERROR_OUT_OF_MEMORY``: There is not enough memory left to - complete the operation. - -- ``RTC_ERROR_UNSUPPORTED_CPU``: The CPU is not supported as it does - not support the lowest ISA Embree is compiled for. - -- ``RTC_ERROR_CANCELLED``: The operation got canceled by a memory - monitor callback or progress monitor callback function. - -When the device construction fails, ``rtcNewDevice`` returns ``NULL`` as -device. To detect the error code of a such a failed device construction, -pass ``NULL`` as device to the ``rtcGetDeviceError`` function. For all -other invocations of ``rtcGetDeviceError``, a proper device pointer must -be specified. - -EXIT STATUS -^^^^^^^^^^^ - -Returns the error code for the device. - -SEE ALSO -^^^^^^^^ - -`rtcSetDeviceErrorFunction <#rtcsetdeviceerrorfunction>`__ - -.. raw:: latex - - \pagebreak - -rtcSetDeviceErrorFunction -------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetDeviceErrorFunction - sets an error callback function for the device - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - typedef void (*RTCErrorFunction)( - void* userPtr, - RTCError code, - const char* str - ); - - void rtcSetDeviceErrorFunction( - RTCDevice device, - RTCErrorFunction error, - void* userPtr - ); - -DESCRIPTION -^^^^^^^^^^^ - -Using the ``rtcSetDeviceErrorFunction`` call, it is possible to set a -callback function (``error`` argument) with payload (``userPtr`` -argument), which is called whenever an error occurs for the specified -device (``device`` argument). - -Only a single callback function can be registered per device, and -further invocations overwrite the previously set callback function. -Passing ``NULL`` as function pointer disables the registered callback -function. - -When the registered callback function is invoked, it gets passed the -user-defined payload (``userPtr`` argument as specified at registration -time), the error code (``code`` argument) of the occurred error, as well -as a string (``str`` argument) that further describes the error. - -The error code is also set if an error callback function is registered. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetDeviceError <#rtcgetdeviceerror>`__ - -.. raw:: latex - - \pagebreak - -rtcSetDeviceMemoryMonitorFunction ---------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetDeviceMemoryMonitorFunction - registers a callback function - to track memory consumption - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - typedef bool (*RTCMemoryMonitorFunction)( - void* userPtr, - ssize_t bytes, - bool post - ); - - void rtcSetDeviceMemoryMonitorFunction( - RTCDevice device, - RTCMemoryMonitorFunction memoryMonitor, - void* userPtr - ); - -DESCRIPTION -^^^^^^^^^^^ - -Using the ``rtcSetDeviceMemoryMonitorFunction`` call, it is possible to -register a callback function (``memoryMonitor`` argument) with payload -(``userPtr`` argument) for a device (``device`` argument), which is -called whenever internal memory is allocated or deallocated by objects -of that device. Using this memory monitor callback mechanism, the -application can track the memory consumption of an Embree device, and -optionally terminate API calls that consume too much memory. - -Only a single callback function can be registered per device, and -further invocations overwrite the previously set callback function. -Passing ``NULL`` as function pointer disables the registered callback -function. - -Once registered, the Embree device will invoke the memory monitor -callback function before or after it allocates or frees important memory -blocks. The callback function gets passed the payload as specified at -registration time (``userPtr`` argument), the number of bytes allocated -or deallocated (``bytes`` argument), and whether the callback is invoked -after the allocation or deallocation took place (``post`` argument). The -callback function might get called from multiple threads concurrently. - -The application can track the current memory usage of the Embree device -by atomically accumulating the ``bytes`` input parameter provided to the -callback function. This parameter will be >0 for allocations and <0 for -deallocations. - -Embree will continue its operation normally when returning ``true`` from -the callback function. If ``false`` is returned, Embree will cancel the -current operation with the ``RTC_ERROR_OUT_OF_MEMORY`` error code. -Issuing multiple cancel requests from different threads is allowed. -Canceling will only happen when the callback was called for allocations -(bytes > 0), otherwise the cancel request will be ignored. - -If a callback to cancel was invoked before the allocation happens -(``post == false``), then the ``bytes`` parameter should not be -accumulated, as the allocation will never happen. If the callback to -cancel was invoked after the allocation happened (``post == true``), -then the ``bytes`` parameter should be accumulated, as the allocation -properly happened and a deallocation will later free that data block. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewDevice <#rtcnewdevice>`__ - -.. raw:: latex - - \pagebreak - -rtcNewScene ------------ - -NAME -^^^^ - -.. code:: cpp - - rtcNewScene - creates a new scene - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCScene rtcNewScene(RTCDevice device); - -DESCRIPTION -^^^^^^^^^^^ - -This function creates a new scene bound to the specified device -(``device`` argument), and returns a handle to this scene. The scene -object is reference counted with an initial reference count of 1. The -scene handle can be released using the ``rtcReleaseScene`` API call. - -EXIT STATUS -^^^^^^^^^^^ - -On success a scene handle is returned. On failure ``NULL`` is returned -and an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcRetainScene <#rtcretainscene>`__, -`rtcReleaseScene <#rtcreleasescene>`__ - -.. raw:: latex - - \pagebreak - -rtcGetSceneDevice ------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcGetSceneDevice - returns the device the scene got created in - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCDevice rtcGetSceneDevice(RTCScene scene); - -DESCRIPTION -^^^^^^^^^^^ - -This function returns the device object the scene got created in. The -returned handle own one additional reference to the device object, thus -you should need to call ``rtcReleaseDevice`` when the returned handle is -no longer required. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcReleaseDevice <#rtcreleasedevice>`__ - -.. raw:: latex - - \pagebreak - -rtcRetainScene --------------- - -NAME -^^^^ - -.. code:: cpp - - rtcRetainScene - increments the scene reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcRetainScene(RTCScene scene); - -DESCRIPTION -^^^^^^^^^^^ - -Scene objects are reference counted. The ``rtcRetainScene`` function -increments the reference count of the passed scene object (``scene`` -argument). This function together with ``rtcReleaseScene`` allows to use -the internal reference counting in a C++ wrapper class to handle the -ownership of the object. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewScene <#rtcnewscene>`__, `rtcReleaseScene <#rtcreleasescene>`__ - -.. raw:: latex - - \pagebreak - -rtcReleaseScene ---------------- - -NAME -^^^^ - -.. code:: cpp - - rtcReleaseScene - decrements the scene reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcReleaseScene(RTCScene scene); - -DESCRIPTION -^^^^^^^^^^^ - -Scene objects are reference counted. The ``rtcReleaseScene`` function -decrements the reference count of the passed scene object (``scene`` -argument). When the reference count falls to 0, the scene gets -destroyed. - -The scene holds a reference to all attached geometries, thus if the -scene gets destroyed, all geometries get detached and their reference -count decremented. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewScene <#rtcnewscene>`__, `rtcRetainScene <#rtcretainscene>`__ - -.. raw:: latex - - \pagebreak - -rtcAttachGeometry ------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcAttachGeometry - attaches a geometry to the scene - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - unsigned int rtcAttachGeometry( - RTCScene scene, - RTCGeometry geometry - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcAttachGeometry`` function attaches a geometry (``geometry`` -argument) to a scene (``scene`` argument) and assigns a geometry ID to -that geometry. All geometries attached to a scene are defined to be -included inside the scene. A geometry can get attached to multiplee -scene. The geometry ID is unique for the scene, and is used to identify -the geometry when hit by a ray during ray queries. - -This function is thread-safe, thus multiple threads can attach -geometries to a scene in parallel. - -The geometry IDs are assigned sequentially, starting from 0, as long as -no geometry got detached. If geometries got detached, the implementation -will reuse IDs in an implementation dependent way. Consequently -sequential assignment is no longer guaranteed, but a compact range of -IDs. - -These rules allow the application to manage a dynamic array to -efficiently map from geometry IDs to its own geometry representation. -Alternatively, the application can also use per-geometry user data to -map to its geometry representation. See ``rtcSetGeometryUserData`` and -``rtcGetGeometryUserData`` for more information. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryUserData <#rtcsetgeometryuserdata>`__, -`rtcGetGeometryUserData <#rtcgetgeometryuserdata>`__ - -.. raw:: latex - - \pagebreak - -rtcAttachGeometryByID ---------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcAttachGeometryByID - attaches a geometry to the scene - using a specified geometry ID - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcAttachGeometryByID( - RTCScene scene, - RTCGeometry geometry, - unsigned int geomID - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcAttachGeometryByID`` function attaches a geometry (``geometry`` -argument) to a scene (``scene`` argument) and assigns a user provided -geometry ID (``geomID`` argument) to that geometry. All geometries -attached to a scene are defined to be included inside the scene. A -geometry can get attached to multiple scenes. The passed user-defined -geometry ID is used to identify the geometry when hit by a ray during -ray queries. Using this function, it is possible to share the same IDs -to refer to geometries inside the application and Embree. - -This function is thread-safe, thus multiple threads can attach -geometries to a scene in parallel. - -The user-provided geometry ID must be unused in the scene, otherwise the -creation of the geometry will fail. Further, the user-provided geometry -IDs should be compact, as Embree internally creates a vector which size -is equal to the largest geometry ID used. Creating very large geometry -IDs for small scenes would thus cause a memory consumption and -performance overhead. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcAttachGeometry <#rtcattachgeometry>`__ - -.. raw:: latex - - \pagebreak - -rtcDetachGeometry ------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcDetachGeometry - detaches a geometry from the scene - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcDetachGeometry(RTCScene scene, unsigned int geomID); - -DESCRIPTION -^^^^^^^^^^^ - -This function detaches a geometry identified by its geometry ID -(``geomID`` argument) from a scene (``scene`` argument). When detached, -the geometry is no longer contained in the scene. - -This function is thread-safe, thus multiple threads can detach -geometries from a scene at the same time. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcAttachGeometry <#rtcattachgeometry>`__, -`rtcAttachGeometryByID <#rtcattachgeometrybyid>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometry --------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometry - returns the geometry bound to - the specified geometry ID - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCGeometry rtcGetGeometry(RTCScene scene, unsigned int geomID); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometry`` function returns the geometry that is bound to -the specified geometry ID (``geomID`` argument) for the specified scene -(``scene`` argument). This function just looks up the handle and does -*not* increment the reference count. If you want to get ownership of the -handle, you need to additionally call ``rtcRetainGeometry``. - -This function is not thread safe and thus can be used during rendering. -However, it is generally recommended to store the geometry handle inside -the application’s geometry representation and look up the geometry -handle from that representation directly. - -If you need a thread safe version of this function please use -`rtcGetGeometryThreadSafe <#rtcgetgeometrythreadsafe>`__. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcAttachGeometry <#rtcattachgeometry>`__, -`rtcAttachGeometryByID <#rtcattachgeometrybyid>`__, -`rtcGetGeometryThreadSafe <#rtcgetgeometrythreadsafe>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometryThreadSafe ------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometryThreadSafe - returns the geometry bound to - the specified geometry ID - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCGeometry rtcGetGeometryThreadSafe(RTCScene scene, unsigned int geomID); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometryThreadSafe`` function returns the geometry that is -bound to the specified geometry ID (``geomID`` argument) for the -specified scene (``scene`` argument). This function just looks up the -handle and does *not* increment the reference count. If you want to get -ownership of the handle, you need to additionally call -``rtcRetainGeometry``. - -This function is thread safe and should NOT get used during rendering. -If you need a fast non-thread safe version during rendering please use -the `rtcGetGeometry <#rtcgetgeometry>`__ function. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcAttachGeometry <#rtcattachgeometry>`__, -`rtcAttachGeometryByID <#rtcattachgeometrybyid>`__, -`rtcGetGeometry <#rtcgetgeometry>`__ - -.. raw:: latex - - \pagebreak - -rtcCommitScene --------------- - -NAME -^^^^ - -.. code:: cpp - - rtcCommitScene - commits scene changes - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcCommitScene(RTCScene scene); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcCommitScene`` function commits all changes for the specified -scene (``scene`` argument). This internally triggers building of a -spatial acceleration structure for the scene using all available worker -threads. Ray queries can be performed only after committing all scene -changes. - -If the application uses TBB 2019 Update 9 or later for parallelization -of rendering, lazy scene construction during rendering is supported by -``rtcCommitScene``. Therefore ``rtcCommitScene`` can get called from -multiple TBB worker threads concurrently for the same scene. The -``rtcCommitScene`` function will then internally isolate the scene -construction using a tbb::isolated_task_group. The alternative approach -of using ``rtcJoinCommitScene`` which uses an tbb:task_arena internally, -is not recommended due to it’s high runtime overhead. - -If scene geometries get modified or attached or detached, the -``rtcCommitScene`` call must be invoked before performing any further -ray queries for the scene; otherwise the effect of the ray query is -undefined. The modification of a geometry, committing the scene, and -tracing of rays must always happen sequentially, and never at the same -time. Any API call that sets a property of the scene or geometries -contained in the scene count as scene modification, e.g. including -setting of intersection filter functions. - -The kind of acceleration structure built can be influenced using scene -flags (see ``rtcSetSceneFlags``), and the quality can be specified using -the ``rtcSetSceneBuildQuality`` function. - -Embree silently ignores primitives during spatial acceleration structure -construction that would cause numerical issues, e.g. primitives -containing NaNs, INFs, or values greater than 1.844E18f (as no -reasonable calculations can be performed with such values without -causing overflows). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcJoinCommitScene <#rtcjoincommitscene>`__ - -.. raw:: latex - - \pagebreak - -rtcJoinCommitScene ------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcJoinCommitScene - commits the scene from multiple threads - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcJoinCommitScene(RTCScene scene); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcJoinCommitScene`` function commits all changes for the -specified scene (``scene`` argument). The scene commit internally -triggers building of a spatial acceleration structure for the scene. Ray -queries can be performed after scene changes got properly committed. - -The ``rtcJoinCommitScene`` function can get called from multiple user -threads which will all cooperate in the build operation. All threads -calling into this function will return from ``rtcJoinCommitScene`` after -the scene commit is finished. All threads must consistently call -``rtcJoinCommitScene`` and not ``rtcCommitScene``. - -In contrast to the ``rtcCommitScene`` function, the -``rtcJoinCommitScene`` function can be called from multiple user -threads, while the ``rtcCommitScene`` can only get called from multiple -TBB worker threads when used concurrently. For optimal performance we -strongly recommend using TBB inside the application together with the -``rtcCommitScene`` function and to avoid using the -``rtcJoinCommitScene`` function. - -The ``rtcJoinCommitScene`` feature allows a flexible way to lazily -create hierarchies during rendering. A thread reaching a -not-yet-constructed sub-scene of a two-level scene can generate the -sub-scene geometry and call ``rtcJoinCommitScene`` on that just -generated scene. During construction, further threads reaching the -not-yet-built scene can join the build operation by also invoking -``rtcJoinCommitScene``. A thread that calls ``rtcJoinCommitScene`` after -the build finishes will directly return from the ``rtcJoinCommitScene`` -call. - -Multiple scene commit operations on different scenes can be running at -the same time, hence it is possible to commit many small scenes in -parallel, distributing the commits to many threads. - -When using Embree with the Intel® Threading Building Blocks (which is -the default), threads that call ``rtcJoinCommitScene`` will join the -build operation, but other TBB worker threads might also participate in -the build. To avoid thread oversubscription, we recommend using TBB also -inside the application. Further, the join mode only works properly -starting with TBB v4.4 Update 1. For earlier TBB versions, threads that -call ``rtcJoinCommitScene`` to join a running build will just trigger -the build and wait for the build to finish. Further, old TBB versions -with ``TBB_INTERFACE_VERSION_MAJOR < 8`` do not support -``rtcJoinCommitScene``, and invoking this function will result in an -error. - -When using Embree with the internal tasking system, only threads that -call ``rtcJoinCommitScene`` will perform the build operation, and no -additional worker threads will be scheduled. - -When using Embree with the Parallel Patterns Library (PPL), -``rtcJoinCommitScene`` is not supported and calling that function will -result in an error. - -To detect whether ``rtcJoinCommitScene`` is supported, use the -``rtcGetDeviceProperty`` function. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcCommitScene <#rtccommitscene>`__, -`rtcGetDeviceProperty <#rtcgetdeviceproperty>`__ - -.. raw:: latex - - \pagebreak - -rtcSetSceneProgressMonitorFunction ----------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetSceneProgressMonitorFunction - registers a callback - to track build progress - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - typedef bool (*RTCProgressMonitorFunction)( - void* ptr, - double n - ); - - void rtcSetSceneProgressMonitorFunction( - RTCScene scene, - RTCProgressMonitorFunction progress, - void* userPtr - ); - -DESCRIPTION -^^^^^^^^^^^ - -Embree supports a progress monitor callback mechanism that can be used -to report progress of hierarchy build operations and to cancel build -operations. - -The ``rtcSetSceneProgressMonitorFunction`` registers a progress monitor -callback function (``progress`` argument) with payload (``userPtr`` -argument) for the specified scene (``scene`` argument). - -Only a single callback function can be registered per scene, and further -invocations overwrite the previously set callback function. Passing -``NULL`` as function pointer disables the registered callback function. - -Once registered, Embree will invoke the callback function multiple times -during hierarchy build operations of the scene, by passing the payload -as set at registration time (``userPtr`` argument), and a double in the -range :math:`[0, 1]` which estimates the progress of the operation -(``n`` argument). The callback function might be called from multiple -threads concurrently. - -When returning ``true`` from the callback function, Embree will continue -the build operation normally. When returning ``false``, Embree will -cancel the build operation with the ``RTC_ERROR_CANCELLED`` error code. -Issuing multiple cancel requests for the same build operation is -allowed. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewScene <#rtcnewscene>`__ - -.. raw:: latex - - \pagebreak - -rtcSetSceneBuildQuality ------------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcSetSceneBuildQuality - sets the build quality for - the scene - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetSceneBuildQuality( - RTCScene scene, - enum RTCBuildQuality quality - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetSceneBuildQuality`` function sets the build quality -(``quality`` argument) for the specified scene (``scene`` argument). -Possible values for the build quality are: - -- ``RTC_BUILD_QUALITY_LOW``: Create lower quality data structures, - e.g. for dynamic scenes. A two-level spatial index structure is built - when enabling this mode, which supports fast partial scene updates, - and allows for setting a per-geometry build quality through the - ``rtcSetGeometryBuildQuality`` function. - -- ``RTC_BUILD_QUALITY_MEDIUM``: Default build quality for most usages. - Gives a good compromise between build and render performance. - -- ``RTC_BUILD_QUALITY_HIGH``: Create higher quality data structures for - final-frame rendering. For certain geometry types this enables a - spatial split BVH. - -Selecting a higher build quality results in better rendering performance -but slower scene commit times. The default build quality for a scene is -``RTC_BUILD_QUALITY_MEDIUM``. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryBuildQuality <#rtcsetgeometrybuildquality>`__ - -.. raw:: latex - - \pagebreak - -rtcSetSceneFlags ----------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetSceneFlags - sets the flags for the scene - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetSceneFlags(RTCScene scene, enum RTCSceneFlags flags); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetSceneFlags`` function sets the scene flags (``flags`` -argument) for the specified scene (``scene`` argument). Possible scene -flags are: - -- ``RTC_SCENE_FLAG_NONE``: No flags set. - -- ``RTC_SCENE_FLAG_DYNAMIC``: Provides better build performance for - dynamic scenes (but also higher memory consumption). - -- ``RTC_SCENE_FLAG_COMPACT``: Uses compact acceleration structures and - avoids algorithms that consume much memory. - -- ``RTC_SCENE_FLAG_ROBUST``: Uses acceleration structures that allow - for robust traversal, and avoids optimizations that reduce arithmetic - accuracy. This mode is typically used for avoiding artifacts caused - by rays shooting through edges of neighboring primitives. - -- ``RTC_SCENE_FLAG_CONTEXT_FILTER_FUNCTION``: Enables support for a - filter function inside the intersection context for this scene. See - Section `rtcInitIntersectContext <#rtcinitintersectcontext>`__ for - more details. - -Multiple flags can be enabled using an ``or`` operation, -e.g. ``RTC_SCENE_FLAG_COMPACT | RTC_SCENE_FLAG_ROBUST``. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetSceneFlags <#rtcgetsceneflags>`__ - -.. raw:: latex - - \pagebreak - -rtcGetSceneFlags ----------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetSceneFlags - returns the flags of the scene - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - enum RTCSceneFlags rtcGetSceneFlags(RTCScene scene); - -DESCRIPTION -^^^^^^^^^^^ - -Queries the flags of a scene. This function can be useful when setting -individual flags, e.g. to just set the robust mode without changing -other flags the following way: - -.. code:: cpp - - RTCSceneFlags flags = rtcGetSceneFlags(scene); - rtcSetSceneFlags(scene, RTC_SCENE_FLAG_ROBUST | flags); - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``RTC_SCENE_FLAG_NONE`` is returned and an error code is set -that can be queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetSceneFlags <#rtcsetsceneflags>`__ - -.. raw:: latex - - \pagebreak - -rtcGetSceneBounds ------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcGetSceneBounds - returns the axis-aligned bounding box of the scene - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCORE_ALIGN(16) RTCBounds - { - float lower_x, lower_y, lower_z, align0; - float upper_x, upper_y, upper_z, align1; - }; - - void rtcGetSceneBounds( - RTCScene scene, - struct RTCBounds* bounds_o - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetSceneBounds`` function queries the axis-aligned bounding box -of the specified scene (``scene`` argument) and stores that bounding box -to the provided destination pointer (``bounds_o`` argument). The stored -bounding box consists of lower and upper bounds for the x, y, and z -dimensions as specified by the ``RTCBounds`` structure. - -The provided destination pointer must be aligned to 16 bytes. The -function may be invoked only after committing the scene; otherwise the -result is undefined. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetSceneLinearBounds <#rtcgetscenelinearbounds>`__, -`rtcCommitScene <#rtccommitscene>`__, -`rtcJoinCommitScene <#rtcjoincommitscene>`__ - -.. raw:: latex - - \pagebreak - -rtcGetSceneLinearBounds ------------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcGetSceneLinearBounds - returns the linear bounds of the scene - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCORE_ALIGN(16) RTCLinearBounds - { - RTCBounds bounds0; - RTCBounds bounds1; - }; - - void rtcGetSceneLinearBounds( - RTCScene scene, - struct RTCLinearBounds* bounds_o - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetSceneLinearBounds`` function queries the linear bounds of -the specified scene (``scene`` argument) and stores them to the provided -destination pointer (``bounds_o`` argument). The stored linear bounds -consist of bounding boxes for time 0 (``bounds0`` member) and time 1 -(``bounds1`` member) as specified by the ``RTCLinearBounds`` structure. -Linearly interpolating these bounds to a specific time ``t`` yields -bounds for the geometry at that time. - -The provided destination pointer must be aligned to 16 bytes. The -function may be called only after committing the scene, otherwise the -result is undefined. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetSceneBounds <#rtcgetscenebounds>`__, -`rtcCommitScene <#rtccommitscene>`__, -`rtcJoinCommitScene <#rtcjoincommitscene>`__ - -.. raw:: latex - - \pagebreak - -rtcNewGeometry --------------- - -NAME -^^^^ - -.. code:: cpp - - rtcNewGeometry - creates a new geometry object - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - enum RTCGeometryType - { - RTC_GEOMETRY_TYPE_TRIANGLE, - RTC_GEOMETRY_TYPE_QUAD, - RTC_GEOMETRY_TYPE_SUBDIVISION, - RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE, - RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE, - RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE, - RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE, - RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE, - RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE, - RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE, - RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE, - RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE, - RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE, - RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE, - RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE, - RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE, - RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE, - RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE, - RTC_GEOMETRY_TYPE_GRID, - RTC_GEOMETRY_TYPE_SPHERE_POINT, - RTC_GEOMETRY_TYPE_DISC_POINT, - RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT, - RTC_GEOMETRY_TYPE_USER, - RTC_GEOMETRY_TYPE_INSTANCE - }; - - RTCGeometry rtcNewGeometry( - RTCDevice device, - enum RTCGeometryType type - ); - -DESCRIPTION -^^^^^^^^^^^ - -Geometries are objects that represent an array of primitives of the same -type. The ``rtcNewGeometry`` function creates a new geometry of -specified type (``type`` argument) bound to the specified device -(``device`` argument) and returns a handle to this geometry. The -geometry object is reference counted with an initial reference count of -1. The geometry handle can be released using the ``rtcReleaseGeometry`` -API call. - -| Supported geometry types are triangle meshes - (``RTC_GEOMETRY_TYPE_TRIANGLE`` type), quad meshes (triangle pairs) - (``RTC_GEOMETRY_TYPE_QUAD`` type), Catmull-Clark subdivision surfaces - (``RTC_GEOMETRY_TYPE_SUBDIVISION`` type), curve geometries with - different bases (``RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE``, - ``RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE``, -| ``RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE``, - ``RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE``, -| ``RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE``, - ``RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE``, - ``RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE``, - ``RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE``, - ``RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE``, - ``RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE``, - ``RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE``, - ``RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE``, - ``RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE``, - ``RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE``, - ``RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE`` types) grid meshes - (``RTC_GEOMETRY_TYPE_GRID``), point geometries - (``RTC_GEOMETRY_TYPE_SPHERE_POINT``, ``RTC_GEOMETRY_TYPE_DISC_POINT``, - ``RTC_TYPE_ORIENTED_DISC_POINT``), user-defined geometries - (``RTC_GEOMETRY_TYPE_USER``), and instances - (``RTC_GEOMETRY_TYPE_INSTANCE``). - -The types ``RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE``, -``RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE``, and -``RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE`` will treat the curve as a -sweep surface of a varying-radius circle swept tangentially along the -curve. The types ``RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE``, -``RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE``, and -``RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE`` use ray-facing ribbons as a -faster-to-intersect approximation. - -After construction, geometries are enabled by default and not attached -to any scene. Geometries can be disabled (``rtcDisableGeometry`` call), -and enabled again (``rtcEnableGeometry`` call). A geometry can be -attached to multiple scenes using the ``rtcAttachGeometry`` call (or -``rtcAttachGeometryByID`` call), and detached using the -``rtcDetachGeometry`` call. During attachment, a geometry ID is assigned -to the geometry (or assigned by the user when using the -``rtcAttachGeometryByID`` call), which uniquely identifies the geometry -inside that scene. This identifier is returned when primitives of the -geometry are hit in later ray queries for the scene. - -Geometries can also be modified, including their vertex and index -buffers. After modifying a buffer, ``rtcUpdateGeometryBuffer`` must be -called to notify that the buffer got modified. - -The application can use the ``rtcSetGeometryUserData`` function to set a -user data pointer to its own geometry representation, and later read out -this pointer using the ``rtcGetGeometryUserData`` function. - -After setting up the geometry or modifying it, ``rtcCommitGeometry`` -must be called to finish the geometry setup. After committing the -geometry, vertex data interpolation can be performed using the -``rtcInterpolate`` and ``rtcInterpolateN`` functions. - -A build quality can be specified for a geometry using the -``rtcSetGeometryBuildQuality`` function, to balance between acceleration -structure build performance and ray query performance. The build quality -per geometry will be used if a two-level acceleration structure is built -internally, which is the case if the ``RTC_BUILD_QUALITY_LOW`` is set as -the scene build quality. See Section -`rtcSetSceneBuildQuality <#rtcsetscenebuildquality>`__ for more details. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcEnableGeometry <#rtcenablegeometry>`__, -`rtcDisableGeometry <#rtcdisablegeometry>`__, -`rtcAttachGeometry <#rtcattachgeometry>`__, -`rtcAttachGeometryByID <#rtcattachgeometrybyid>`__, -`rtcUpdateGeometryBuffer <#rtcupdategeometrybuffer>`__, -`rtcSetGeometryUserData <#rtcsetgeometryuserdata>`__, -`rtcGetGeometryUserData <#rtcgetgeometryuserdata>`__, -`rtcCommitGeometry <#rtccommitgeometry>`__, -`rtcInterpolate <#rtcinterpolate>`__, -`rtcInterpolateN <#rtcinterpolaten>`__, -`rtcSetGeometryBuildQuality <#rtcsetgeometrybuildquality>`__, -`rtcSetSceneBuildQuality <#rtcsetscenebuildquality>`__, -`RTC_GEOMETRY_TYPE_TRIANGLE <#rtc_geometry_type_triangle>`__, -`RTC_GEOMETRY_TYPE_QUAD <#rtc_geometry_type_quad>`__, -`RTC_GEOMETRY_TYPE_SUBDIVISION <#rtc_geometry_type_subdivision>`__, -`RTC_GEOMETRY_TYPE_CURVE <#rtc_geometry_type_curve>`__, -`RTC_GEOMETRY_TYPE_GRID <#rtc_geometry_type_grid>`__, -`RTC_GEOMETRY_TYPE_POINT <#rtc_geometry_type_point>`__, -`RTC_GEOMETRY_TYPE_USER <#rtc_geometry_type_user>`__, -`RTC_GEOMETRY_TYPE_INSTANCE <#rtc_geometry_type_instance>`__ - -.. raw:: latex - - \pagebreak - -RTC_GEOMETRY_TYPE_TRIANGLE --------------------------- - -NAME -^^^^ - -.. code:: cpp - - RTC_GEOMETRY_TYPE_TRIANGLE - triangle geometry type - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCGeometry geometry = - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_TRIANGLE); - -DESCRIPTION -^^^^^^^^^^^ - -Triangle meshes are created by passing ``RTC_GEOMETRY_TYPE_TRIANGLE`` to -the ``rtcNewGeometry`` function call. The triangle indices can be -specified by setting an index buffer (``RTC_BUFFER_TYPE_INDEX`` type) -and the triangle vertices by setting a vertex buffer -(``RTC_BUFFER_TYPE_VERTEX`` type). See ``rtcSetGeometryBuffer`` and -``rtcSetSharedGeometryBuffer`` for more details on how to set buffers. -The index buffer must contain an array of three 32-bit indices per -triangle (``RTC_FORMAT_UINT3`` format) and the number of primitives is -inferred from the size of that buffer. The vertex buffer must contain an -array of single precision ``x``, ``y``, ``z`` floating point coordinates -(``RTC_FORMAT_FLOAT3`` format), and the number of vertices are inferred -from the size of that buffer. The vertex buffer can be at most 16 GB -large. - -The parameterization of a triangle uses the first vertex ``p0`` as base -point, the vector ``p1 - p0`` as u-direction and the vector ``p2 - p0`` -as v-direction. Thus vertex attributes ``t0,t1,t2`` can be linearly -interpolated over the triangle the following way: - -.. code:: cpp - - t_uv = (1-u-v)*t0 + u*t1 + v*t2 - = t0 + u*(t1-t0) + v*(t2-t0) - -A triangle whose vertices are laid out counter-clockwise has its -geometry normal pointing upwards outside the front face, like -illustrated in the following picture: - -.. image:: images/triangle_uv.png - -For multi-segment motion blur, the number of time steps must be first -specified using the ``rtcSetGeometryTimeStepCount`` call. Then a vertex -buffer for each time step can be set using different buffer slots, and -all these buffers have to have the same stride and size. - -Also see tutorial `Triangle -Geometry `__ for an example of how to -create triangle meshes. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that be get -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__ - -.. raw:: latex - - \pagebreak - -RTC_GEOMETRY_TYPE_QUAD ----------------------- - -NAME -^^^^ - -.. code:: cpp - - RTC_GEOMETRY_TYPE_QUAD - quad geometry type - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCGeometry geometry = - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_QUAD); - -DESCRIPTION -^^^^^^^^^^^ - -Quad meshes are created by passing ``RTC_GEOMETRY_TYPE_QUAD`` to the -``rtcNewGeometry`` function call. The quad indices can be specified by -setting an index buffer (``RTC_BUFFER_TYPE_INDEX`` type) and the quad -vertices by setting a vertex buffer (``RTC_BUFFER_TYPE_VERTEX`` type). -See ``rtcSetGeometryBuffer`` and ``rtcSetSharedGeometryBuffer`` for more -details on how to set buffers. The index buffer contains an array of -four 32-bit indices per quad (``RTC_FORMAT_UINT4`` format), and the -number of primitives is inferred from the size of that buffer. The -vertex buffer contains an array of single precision ``x``, ``y``, ``z`` -floating point coordinates (``RTC_FORMAT_FLOAT3`` format), and the -number of vertices is inferred from the size of that buffer. The vertex -buffer can be at most 16 GB large. - -A quad is internally handled as a pair of two triangles ``v0,v1,v3`` and -``v2,v3,v1``, with the ``u'``/``v'`` coordinates of the second triangle -corrected by ``u = 1-u'`` and ``v = 1-v'`` to produce a quad -parameterization where ``u`` and ``v`` are in the range 0 to 1. Thus the -parameterization of a quad uses the first vertex ``p0`` as base point, -and the vector ``p1 - p0`` as ``u``-direction, and ``p3 - p0`` as -v-direction. Thus vertex attributes ``t0,t1,t2,t3`` can be bilinearly -interpolated over the quadrilateral the following way: - -.. code:: cpp - - t_uv = (1-v)((1-u)*t0 + u*t1) + v*((1-u)*t3 + u*t2) - -Mixed triangle/quad meshes are supported by encoding a triangle as a -quad, which can be achieved by replicating the last triangle vertex -(``v0,v1,v2`` -> ``v0,v1,v2,v2``). This way the second triangle is a -line (which can never get hit), and the parameterization of the first -triangle is compatible with the standard triangle parameterization. - -A quad whose vertices are laid out counter-clockwise has its geometry -normal pointing upwards outside the front face, like illustrated in the -following picture. - -.. image:: images/quad_uv.png - -For multi-segment motion blur, the number of time steps must be first -specified using the ``rtcSetGeometryTimeStepCount`` call. Then a vertex -buffer for each time step can be set using different buffer slots, and -all these buffers must have the same stride and size. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__ - -.. raw:: latex - - \pagebreak - -RTC_GEOMETRY_TYPE_GRID ----------------------- - -NAME -^^^^ - -.. code:: cpp - - RTC_GEOMETRY_TYPE_GRID - grid geometry type - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCGeometry geometry = - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_GRID); - -DESCRIPTION -^^^^^^^^^^^ - -Grid meshes are created by passing ``RTC_GEOMETRY_TYPE_GRID`` to the -``rtcNewGeometry`` function call, and contain an array of grid -primitives. This array of grids can be specified by setting up a grid -buffer (with ``RTC_BUFFER_TYPE_GRID`` type and ``RTC_FORMAT_GRID`` -format) and the grid mesh vertices by setting a vertex buffer -(``RTC_BUFFER_TYPE_VERTEX`` type). See ``rtcSetGeometryBuffer`` and -``rtcSetSharedGeometryBuffer`` for more details on how to set buffers. -The number of grid primitives in the grid mesh is inferred from the size -of the grid buffer. - -The vertex buffer contains an array of single precision ``x``, ``y``, -``z`` floating point coordinates (``RTC_FORMAT_FLOAT3`` format), and the -number of vertices is inferred from the size of that buffer. - -Each grid in the grid buffer is of the type ``RTCGrid``: - -.. code:: cpp - - struct RTCGrid - { - unsigned int startVertexID; - unsigned int stride; - unsigned short width,height; - }; - -The ``RTCGrid`` structure describes a 2D grid of vertices (with respect -to the vertex buffer of the grid mesh). The ``width`` and ``height`` -members specify the number of vertices in u and v direction, -e.g. setting both ``width`` and ``height`` to 3 sets up a 3×3 vertex -grid. The maximum allowed ``width`` and ``height`` is 32767. The -``startVertexID`` specifies the ID of the top-left vertex in the vertex -grid, while the ``stride`` parameter specifies a stride (in number of -vertices) used to step to the next row. - -A vertex grid of dimensions ``width`` and ``height`` is treated as a -``(width-1)`` x ``(height-1)`` grid of ``quads`` (triangle-pairs), with -the same shared edge handling as for regular quad meshes. However, the -``u``/``v`` coordinates have the uniform range ``[0..1]`` for an entire -vertex grid. The ``u`` direction follows the ``width`` of the grid while -the ``v`` direction the ``height``. - -For multi-segment motion blur, the number of time steps must be first -specified using the ``rtcSetGeometryTimeStepCount`` call. Then a vertex -buffer for each time step can be set using different buffer slots, and -all these buffers must have the same stride and size. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__ - -.. raw:: latex - - \pagebreak - -RTC_GEOMETRY_TYPE_SUBDIVISION ------------------------------ - -NAME -^^^^ - -.. code:: cpp - - RTC_GEOMETRY_TYPE_SUBDIVISION - subdivision geometry type - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCGeometry geometry = - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_SUBDIVISION); - -DESCRIPTION -^^^^^^^^^^^ - -Catmull-Clark subdivision meshes are supported, including support for -edge creases, vertex creases, holes, non-manifold geometry, and -face-varying interpolation. The number of vertices per face can be in -the range of 3 to 15 vertices (triangles, quadrilateral, pentagons, -etc). - -Subdivision meshes are created by passing -``RTC_GEOMETRY_TYPE_SUBDIVISION`` to the ``rtcNewGeometry`` function. -Various buffers need to be set by the application to set up the -subdivision mesh. See ``rtcSetGeometryBuffer`` and -``rtcSetSharedGeometryBuffer`` for more details on how to set buffers. -The face buffer (``RTC_BUFFER_TYPE_FACE`` type and ``RTC_FORMAT_UINT`` -format) contains the number of edges/indices of each face (3 to 15), and -the number of faces is inferred from the size of this buffer. The index -buffer (``RTC_BUFFER_TYPE_INDEX`` type) contains multiple (3 to 15) -32-bit vertex indices (``RTC_FORMAT_UINT`` format) for each face, and -the number of edges is inferred from the size of this buffer. The vertex -buffer (``RTC_BUFFER_TYPE_VERTEX`` type) stores an array of single -precision ``x``, ``y``, ``z`` floating point coordinates -(``RTC_FORMAT_FLOAT3`` format), and the number of vertices is inferred -from the size of this buffer. - -Optionally, the application may set additional index buffers using -different buffer slots if multiple topologies are required for -face-varying interpolation. The standard vertex buffers -(``RTC_BUFFER_TYPE_VERTEX``) are always bound to the geometry topology -(topology 0) thus use ``RTC_BUFFER_TYPE_INDEX`` with buffer slot 0. User -vertex data interpolation may use different topologies as described -later. - -Optionally, the application can set up the hole buffer -(``RTC_BUFFER_TYPE_HOLE``) which contains an array of 32-bit indices -(``RTC_FORMAT_UINT`` format) of faces that should be considered -non-existing in all topologies. The number of holes is inferred from the -size of this buffer. - -Optionally, the application can fill the level buffer -(``RTC_BUFFER_TYPE_LEVEL``) with a tessellation rate for each of the -edges of each face. This buffer must have the same size as the index -buffer. The tessellation level is a positive floating point value -(``RTC_FORMAT_FLOAT`` format) that specifies how many quads along the -edge should be generated during tessellation. If no level buffer is -specified, a level of 1 is used. The maximally supported edge level is -4096, and larger levels are clamped to that value. Note that edges may -be shared between (typically 2) faces. To guarantee a watertight -tessellation, the level of these shared edges should be identical. A -uniform tessellation rate for an entire subdivision mesh can be set by -using the ``rtcSetGeometryTessellationRate`` function. The existence of -a level buffer has precedence over the uniform tessellation rate. - -Optionally, the application can fill the sparse edge crease buffers to -make edges appear sharper. The edge crease index buffer -(``RTC_BUFFER_TYPE_EDGE_CREASE_INDEX``) contains an array of pairs of -32-bit vertex indices (``RTC_FORMAT_UINT2`` format) that specify -unoriented edges in the geometry topology. The edge crease weight buffer -(``RTC_BUFFER_TYPE_EDGE_CREASE_WEIGHT``) stores for each of these crease -edges a positive floating point weight (``RTC_FORMAT_FLOAT`` format). -The number of edge creases is inferred from the size of these buffers, -which has to be identical. The larger a weight, the sharper the edge. -Specifying a weight of infinity is supported and marks an edge as -infinitely sharp. Storing an edge multiple times with the same crease -weight is allowed, but has lower performance. Storing an edge multiple -times with different crease weights results in undefined behavior. For a -stored edge (i,j), the reverse direction edges (j,i) do not have to be -stored, as both are considered the same unoriented edge. Edge crease -features are shared between all topologies. - -Optionally, the application can fill the sparse vertex crease buffers to -make vertices appear sharper. The vertex crease index buffer -(``RTC_BUFFER_TYPE_VERTEX_CREASE_INDEX``), contains an array of 32-bit -vertex indices (``RTC_FORMAT_UINT`` format) to specify a set of vertices -from the geometry topology. The vertex crease weight buffer -(``RTC_BUFFER_TYPE_VERTEX_CREASE_WEIGHT``) specifies for each of these -vertices a positive floating point weight (``RTC_FORMAT_FLOAT`` format). -The number of vertex creases is inferred from the size of these buffers, -and has to be identical. The larger a weight, the sharper the vertex. -Specifying a weight of infinity is supported and makes the vertex -infinitely sharp. Storing a vertex multiple times with the same crease -weight is allowed, but has lower performance. Storing a vertex multiple -times with different crease weights results in undefined behavior. -Vertex crease features are shared between all topologies. - -Subdivision modes can be used to force linear interpolation for parts of -the subdivision mesh; see ``rtcSetGeometrySubdivisionMode`` for more -details. - -For multi-segment motion blur, the number of time steps must be first -specified using the ``rtcSetGeometryTimeStepCount`` call. Then a vertex -buffer for each time step can be set using different buffer slots, and -all these buffers have to have the same stride and size. - -Also see tutorial `Subdivision -Geometry `__ for an example of how -to create subdivision surfaces. - -Parameterization -^^^^^^^^^^^^^^^^ - -The parameterization for subdivision faces is different for -quadrilaterals and non-quadrilateral faces. - -The parameterization of a quadrilateral face uses the first vertex ``p0`` -as base point, and the vector ``p1 - p0`` as u-direction and ``p3 - p0`` -as v-direction. - -The parameterization for all other face types (with number of vertices -not equal 4), have a special parameterization where the subpatch ID ``n`` -(of the ``n``-th quadrilateral that would be obtained by a single -subdivision step) and the local hit location inside this quadrilateral -are encoded in the UV coordinates. The following code extracts the -sub-patch ID ``i`` and local UVs of this subpatch: - -.. code:: cpp - - unsigned int l = floorf(0.5f*U); - unsigned int h = floorf(0.5f*V); - unsigned int i = 4*h+l; - float u = 2.0f*fracf(0.5f*U)-0.5f; - float v = 2.0f*fracf(0.5f*V)-0.5f; - -This encoding allows local subpatch UVs to be in the range -``[-0.5,1.5[`` thus negative subpatch UVs can be passed to -``rtcInterpolate`` to sample subpatches slightly out of bounds. This can -be useful to calculate derivatives using finite differences if required. -The encoding further has the property that one can just move the value -``u`` (or ``v``) on a subpatch by adding ``du`` (or ``dv``) to the -special UV encoding as long as it does not fall out of the -``[-0.5,1.5[`` range. - -To smoothly interpolate vertex attributes over the subdivision surface -we recommend using the ``rtcInterpolate`` function, which will apply the -standard subdivision rules for interpolation and automatically takes -care of the special UV encoding for non-quadrilaterals. - -Face-Varying Data -^^^^^^^^^^^^^^^^^ - -Face-varying interpolation is supported through multiple topologies per -subdivision mesh and binding such topologies to vertex attribute buffers -to interpolate. This way, texture coordinates may use a different -topology with additional boundaries to construct separate UV regions -inside one subdivision mesh. - -Each such topology ``i`` has a separate index buffer (specified using -``RTC_BUFFER_TYPE_INDEX`` with buffer slot ``i``) and separate -subdivision mode that can be set using -``rtcSetGeometrySubdivisionMode``. A vertex attribute buffer -``RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE`` bound to a buffer slot ``j`` can be -assigned to use a topology for interpolation using the -``rtcSetGeometryVertexAttributeTopology`` call. - -The face buffer (``RTC_BUFFER_TYPE_FACE`` type) is shared between all -topologies, which means that the ``n``-th primitive always has the same -number of vertices (e.g. being a triangle or a quad) for each topology. -However, the indices of the topologies themselves may be different. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__ - -.. raw:: latex - - \pagebreak - -RTC_GEOMETRY_TYPE_CURVE ------------------------ - -NAME -^^^^ - -.. code:: cpp - - RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE - - flat curve geometry with linear basis - - RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE - - flat curve geometry with cubic Bézier basis - - RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE - - flat curve geometry with cubic B-spline basis - - RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE - - flat curve geometry with cubic Hermite basis - - RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE - - flat curve geometry with Catmull-Rom basis - - RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE - - flat normal oriented curve geometry with cubic Bézier basis - - RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE - - flat normal oriented curve geometry with cubic B-spline basis - - RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE - - flat normal oriented curve geometry with cubic Hermite basis - - RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE - - flat normal oriented curve geometry with Catmull-Rom basis - - RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE - - capped cone curve geometry with linear basis - discontinous at edge boundaries - - RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE - - capped cone curve geometry with linear basis and spherical ending - - RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE - - swept surface curve geometry with cubic Bézier basis - - RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE - - swept surface curve geometry with cubic B-spline basis - - RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE - - swept surface curve geometry with cubic Hermite basis - - RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE - - swept surface curve geometry with Catmull-Rom basis - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BEZIER_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_BSPLINE_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_HERMITE_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE); - -DESCRIPTION -^^^^^^^^^^^ - -Curves with per vertex radii are supported with linear, cubic Bézier, -cubic B-spline, and cubic Hermite bases. Such curve geometries are -created by passing ``RTC_GEOMETRY_TYPE_FLAT_LINEAR_CURVE``, -``RTC_GEOMETRY_TYPE_FLAT_BEZIER_CURVE``, -``RTC_GEOMETRY_TYPE_FLAT_BSPLINE_CURVE``, -``RTC_GEOMETRY_TYPE_FLAT_HERMITE_CURVE``, -``RTC_GEOMETRY_TYPE_FLAT_CATMULL_ROM_CURVE``, -``RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_FLAT_BEZIER_CURVE``, -``RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_FLAT_BSPLINE_CURVE``, -``RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_FLAT_HERMITE_CURVE``, -``RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_FLAT_CATMULL_ROM_CURVE``, -``RTC_GEOMETRY_TYPE_CONE_LINEAR_CURVE``, -``RTC_GEOMETRY_TYPE_ROUND_LINEAR_CURVE``, -``RTC_GEOMETRY_TYPE_ROUND_BEZIER_CURVE``, -``RTC_GEOMETRY_TYPE_ROUND_BSPLINE_CURVE``, -``RTC_GEOMETRY_TYPE_ROUND_HERMITE_CURVE``, or -``RTC_GEOMETRY_TYPE_ROUND_CATMULL_ROM_CURVE`` to the ``rtcNewGeometry`` -function. The curve indices can be specified through an index buffer -(``RTC_BUFFER_TYPE_INDEX``) and the curve vertices through a vertex -buffer (``RTC_BUFFER_TYPE_VERTEX``). For the Hermite basis a tangent -buffer (``RTC_BUFFER_TYPE_TANGENT``), normal oriented curves a normal -buffer (``RTC_BUFFER_TYPE_NORMAL``), and for normal oriented Hermite -curves a normal derivative buffer -(``RTC_BUFFER_TYPE_NORMAL_DERIVATIVE``) has to get specified -additionally. See ``rtcSetGeometryBuffer`` and -``rtcSetSharedGeometryBuffer`` for more details on how to set buffers. - -The index buffer contains an array of 32-bit indices -(``RTC_FORMAT_UINT`` format), each pointing to the first control vertex -in the vertex buffer, but also to the first tangent in the tangent -buffer, and first normal in the normal buffer if these buffers are -present. - -The vertex buffer stores each control vertex in the form of a single -precision position and radius stored in (``x``, ``y``, ``z``, ``r``) -order in memory (``RTC_FORMAT_FLOAT4`` format). The number of vertices -is inferred from the size of this buffer. The radii may be smaller than -zero but the interpolated radii should always be greater or equal to -zero. Similarly, the tangent buffer stores the derivative of each -control vertex (``x``, ``y``, ``z``, ``r`` order and -``RTC_FORMAT_FLOAT4`` format) and the normal buffer stores a single -precision normal per control vertex (``x``, ``y``, ``z`` order and -``RTC_FORMAT_FLOAT3`` format). - -Linear Basis -'''''''''''' - -For the linear basis the indices point to the first of 2 consecutive -control points in the vertex buffer. The first control point is the -start and the second control point the end of the line segment. When -constructing hair strands in this basis, the end-point can be shared -with the start of the next line segment. - -For the linear basis the user optionally can provide a flags buffer of -type ``RTC_BUFFER_TYPE_FLAGS`` which contains bytes that encode if the -left neighbor segment (``RTC_CURVE_FLAG_NEIGHBOR_LEFT`` flag) and/or -right neighbor segment (``RTC_CURVE_FLAG_NEIGHBOR_RIGHT`` flags) exist -(see `RTCCurveFlags <#rtccurveflags>`__). If this buffer is not set, -than the left/right neighbor bits are automatically calculated base on -the index buffer (left segment exists if segment(id-1)+1 == segment(id) -and right segment exists if segment(id+1)-1 == segment(id)). - -A left neighbor segment is assumed to end at the start vertex of the -current segment, and to start at the previous vertex in the vertex -buffer. Similarly, the right neighbor segment is assumed to start at the -end vertex of the current segment, and to end at the next vertex in the -vertex buffer. - -Only when the left and right bits are properly specified the current -segment can properly attach to the left and/or right neighbor, otherwise -the touching area may not get rendered properly. - -Bézier Basis -'''''''''''' - -For the cubic Bézier basis the indices point to the first of 4 -consecutive control points in the vertex buffer. These control points -use the cubic Bézier basis, where the first control point represents the -start point of the curve, and the 4th control point the end point of the -curve. The Bézier basis is interpolating, thus the curve does go exactly -through the first and fourth control vertex. - -B-spline Basis -'''''''''''''' - -For the cubic B-spline basis the indices point to the first of 4 -consecutive control points in the vertex buffer. These control points -make up a cardinal cubic B-spline (implicit equidistant knot vector). -This basis is not interpolating, thus the curve does in general not go -through any of the control points directly. A big advantage of this -basis is that 3 control points can be shared for two continuous -neighboring curve segments, e.g. the curves (p0,p1,p2,p3) and -(p1,p2,p3,p4) are C1 continuous. This feature make this basis a good -choice to construct continuous multi-segment curves, as memory -consumption can be kept minimal. - -Hermite Basis -''''''''''''' - -For the cubic Hermite basis the indices point to the first of 2 -consecutive points in the vertex buffer, and the first of 2 consecutive -tangents in the tangent buffer. These two points and two tangents make -up a cubic Hermite curve. This basis is interpolating, thus does exactly -go through the first and second control point, and the first order -derivative at the begin and end matches exactly the value specified in -the tangent buffer. When connecting two segments continuously, the end -point and tangent of the previous segment can be shared. Different -versions of Catmull-Rom splines can be easily constructed using the -Hermite basis, by calculating a proper tangent buffer from the control -points. - -Catmull-Rom Basis -''''''''''''''''' - -For the Catmull-Rom basis the indices point to the first of 4 -consecutive control points in the vertex buffer. This basis goes through -p1 and p2, with tangents (p2-p0)/2 and (p3-p1)/2. - -Flat Curves -''''''''''' - -The ``RTC_GEOMETRY_TYPE_FLAT_*`` flat mode is a fast mode designed to -render distant hair. In this mode the curve is rendered as a connected -sequence of ray facing quads. Individual quads are considered to have -subpixel size, and zooming onto the curve might show geometric -artifacts. The number of quads to subdivide into can be specified -through the ``rtcSetGeometryTessellationRate`` function. By default the -tessellation rate is 4. - -Normal Oriented Curves -'''''''''''''''''''''' - -The ``RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_*`` mode is a mode designed to -render blades of grass. In this mode a vertex spline has to get -specified as for the previous modes, but additionally a normal spline is -required. If the Hermite basis is used, the ``RTC_BUFFER_TYPE_NORMAL`` -and ``RTC_BUFFER_TYPE_NORMAL_DERIVATIVE`` buffers have both to be set. - -The curve is rendered as a flat band whose center approximately follows -the provided vertex spline, whose half width approximately follows the -provided radius spline, and whose normal orientation approximately -follows the provided normal spline. - -To intersect the normal oriented curve, we perform a newton-raphson -style intersection of a ray with a tensor product surface of a linear -basis (perpendicular to the curve) and cubic Bézier basis (along the -curve). We use a guide curve and its derivatives to construct the -control points of that surface. The guide curve is defined by a sweep -surface defined by sweeping a line centered at the vertex spline -location along the curve. At each parameter value the half width of the -line matches the radius spline, and the direction matches the cross -product of the normal from the normal spline and tangent of the vertex -spline. Note that this construction does not work when the provided -normals are parallel to the curve direction. For this reason the -provided normals should best be kept as perpendicular to the curve -direction as possible. - -Round Curves -'''''''''''' - -In the ``RTC_GEOMETRY_TYPE_ROUND_*`` round mode, a real geometric -surface is rendered for the curve, which is more expensive but allows -closeup views. - -For the linear basis the round mode renders a cone that tangentially -touches a start-sphere and end-sphere. The start sphere is rendered when -no previous segments is indicated by the neighbor bits. The end sphere -is always rendered but parts that lie inside the next segment are -clipped away (if that next segment exists). This way a curve is closed -on both ends and the interiour will render properly as long as only -neighboring segments penetrate into a segment. For this to work properly -it is important that the flags buffer is properly populated with -neighbor information. - -For the cubic polynomial bases, the round mode renders a sweep surface -by sweeping a varying radius circle tangential along the curve. As a -limitation, the radius of the curve has to be smaller than the curvature -radius of the curve at each location on the curve. - -The intersection with the curve segment stores the parametric hit -location along the curve segment as u-coordinate (range 0 to +1). - -For flat curves, the v-coordinate is set to the normalized distance in -the range -1 to +1. For normal oriented curves the v-coordinate is in -the range 0 to 1. For the linear basis and in round mode the -v-coordinate is set to zero. - -In flat mode, the geometry normal ``Ng`` is set to the tangent of the -curve at the hit location. In round mode and for normal oriented curves, -the geometry normal ``Ng`` is set to the non-normalized geometric normal -of the surface. - -For multi-segment motion blur, the number of time steps must be first -specified using the ``rtcSetGeometryTimeStepCount`` call. Then a vertex -buffer for each time step can be set using different buffer slots, and -all these buffers must have the same stride and size. For the Hermite -basis also a tangent buffer has to be set for each time step and for -normal oriented curves a normal buffer has to get specified for each -time step. - -Also see tutorials `Hair `__ and -`Curves `__ for examples of how to create -and use curve geometries. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, `RTCCurveFlags <#rtccurveflags>`__ - -.. raw:: latex - - \pagebreak - -RTC_GEOMETRY_TYPE_POINT ------------------------ - -NAME -^^^^ - -.. code:: cpp - - RTC_GEOMETRY_TYPE_SPHERE_POINT - - point geometry spheres - - RTC_GEOMETRY_TYPE_DISC_POINT - - point geometry with ray-oriented discs - - RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT - - point geometry with normal-oriented discs - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_SPHERE_POINT); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_DISC_POINT); - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT); - -DESCRIPTION -^^^^^^^^^^^ - -Points with per vertex radii are supported with sphere, ray-oriented -discs, and normal-oriented discs geometric representations. Such point -geometries are created by passing ``RTC_GEOMETRY_TYPE_SPHERE_POINT``, -``RTC_GEOMETRY_TYPE_DISC_POINT``, or -``RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT`` to the ``rtcNewGeometry`` -function. The point vertices can be specified t through a vertex buffer -(``RTC_BUFFER_TYPE_VERTEX``). For the normal oriented discs a normal -buffer (``RTC_BUFFER_TYPE_NORMAL``) has to get specified additionally. -See ``rtcSetGeometryBuffer`` and ``rtcSetSharedGeometryBuffer`` for more -details on how to set buffers. - -The vertex buffer stores each control vertex in the form of a single -precision position and radius stored in (``x``, ``y``, ``z``, ``r``) -order in memory (``RTC_FORMAT_FLOAT4`` format). The number of vertices -is inferred from the size of this buffer. Similarly, the normal buffer -stores a single precision normal per control vertex (``x``, ``y``, ``z`` -order and ``RTC_FORMAT_FLOAT3`` format). - -In the ``RTC_GEOMETRY_TYPE_SPHERE_POINT`` mode, a real geometric surface -is rendered for the curve, which is more expensive but allows closeup -views. - -The ``RTC_GEOMETRY_TYPE_DISC_POINT`` flat mode is a fast mode designed -to render distant points. In this mode the point is rendered as a ray -facing disc. - -The ``RTC_GEOMETRY_TYPE_ORIENTED_DISC_POINT`` mode is a mode designed as -a midpoint geometrically between ray facing discs and spheres. In this -mode the point is rendered as a normal oriented disc. - -For all point types, only the hit distance and geometry normal is -returned as hit information, u and v are set to zero. - -For multi-segment motion blur, the number of time steps must be first -specified using the ``rtcSetGeometryTimeStepCount`` call. Then a vertex -buffer for each time step can be set using different buffer slots, and -all these buffers must have the same stride and size. - -Also see tutorial [Points] for an example of how to create and use point -geometries. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__ - -.. raw:: latex - - \pagebreak - -RTC_GEOMETRY_TYPE_USER ----------------------- - -NAME -^^^^ - -.. code:: cpp - - RTC_GEOMETRY_TYPE_USER - user geometry type - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCGeometry geometry = - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_USER); - -DESCRIPTION -^^^^^^^^^^^ - -User-defined geometries contain a number of user-defined primitives, -just like triangle meshes contain multiple triangles. The shape of the -user-defined primitives is specified through registered callback -functions, which enable extending Embree with arbitrary types of -primitives. - -User-defined geometries are created by passing -``RTC_GEOMETRY_TYPE_USER`` to the ``rtcNewGeometry`` function call. One -has to set the number of primitives (see -``rtcSetGeometryUserPrimitiveCount``), a user data pointer (see -``rtcSetGeometryUserData``), a bounding function closure (see -``rtcSetGeometryBoundsFunction``), as well as user-defined intersect -(see ``rtcSetGeometryIntersectFunction``) and occluded (see -``rtcSetGeometryOccludedFunction``) callback functions. The bounding -function is used to query the bounds of all time steps of a user -primitive, while the intersect and occluded callback functions are -called to intersect the primitive with a ray. The user data pointer is -passed to each callback invocation and can be used to point to the -application’s representation of the user geometry. - -The creation of a user geometry typically looks the following: - -.. code:: cpp - - RTCGeometry geometry = rtcNewGeometry(device, RTC_GEOMETRY_TYPE_USER); - rtcSetGeometryUserPrimitiveCount(geometry, numPrimitives); - rtcSetGeometryUserData(geometry, userGeometryRepresentation); - rtcSetGeometryBoundsFunction(geometry, boundsFunction); - rtcSetGeometryIntersectFunction(geometry, intersectFunction); - rtcSetGeometryOccludedFunction(geometry, occludedFunction); - -Please have a look at the ``rtcSetGeometryBoundsFunction``, -``rtcSetGeometryIntersectFunction``, and -``rtcSetGeometryOccludedFunction`` functions on the implementation of -the callback functions. - -Primitives of a user geometry are ignored during rendering when their -bounds are empty, thus bounds have lower>upper in at least one -dimension. - -See tutorial `User Geometry `__ for an -example of how to use the user-defined geometries. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, -`rtcSetGeometryUserPrimitiveCount <#rtcsetgeometryuserprimitivecount>`__, -`rtcSetGeometryUserData <#rtcsetgeometryuserdata>`__, -`rtcSetGeometryBoundsFunction <#rtcsetgeometryboundsfunction>`__, -`rtcSetGeometryIntersectFunction <#rtcsetgeometryintersectfunction>`__, -`rtcSetGeometryOccludedFunction <#rtcsetgeometryoccludedfunction>`__ - -.. raw:: latex - - \pagebreak - -RTC_GEOMETRY_TYPE_INSTANCE --------------------------- - -NAME -^^^^ - -.. code:: cpp - - RTC_GEOMETRY_TYPE_INSTANCE - instance geometry type - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCGeometry geometry = - rtcNewGeometry(device, RTC_GEOMETRY_TYPE_INSTANCE); - -DESCRIPTION -^^^^^^^^^^^ - -Embree supports instancing of scenes using affine transformations (3×3 -matrix plus translation). As the instanced scene is stored only a single -time, even if instanced to multiple locations, this feature can be used -to create very complex scenes with small memory footprint. - -Embree supports both single-level instancing and multi-level instancing. -The maximum instance nesting depth is ``RTC_MAX_INSTANCE_LEVEL_COUNT``; -it can be configured at compile-time using the constant -``EMBREE_MAX_INSTANCE_LEVEL_COUNT``. Users should adapt this constant to -their needs: instances nested any deeper are silently ignored in release -mode, and cause assertions in debug mode. - -Instances are created by passing ``RTC_GEOMETRY_TYPE_INSTANCE`` to the -``rtcNewGeometry`` function call. The instanced scene can be set using -the ``rtcSetGeometryInstancedScene`` call, and the affine transformation -can be set using the ``rtcSetGeometryTransform`` function. - -Please note that ``rtcCommitScene`` on the instanced scene should be -called first, followed by ``rtcCommitGeometry`` on the instance, -followed by ``rtcCommitScene`` for the top-level scene containing the -instance. - -If a ray hits the instance, the ``geomID`` and ``primID`` members of the -hit are set to the geometry ID and primitive ID of the hit primitive in -the instanced scene, and the ``instID`` member of the hit is set to the -geometry ID of the instance in the top-level scene. - -The instancing scheme can also be implemented using user geometries. To -achieve this, the user geometry code should set the ``instID`` member of -the intersection context to the geometry ID of the instance, then trace -the transformed ray, and finally set the ``instID`` field of the -intersection context again to -1. The ``instID`` field is copied -automatically by each primitive intersector into the ``instID`` field of -the hit structure when the primitive is hit. See the `User -Geometry `__ tutorial for an example. - -For multi-segment motion blur, the number of time steps must be first -specified using the ``rtcSetGeometryTimeStepCount`` function. Then a -transformation for each time step can be specified using the -``rtcSetGeometryTransform`` function. - -See tutorials `Instanced Geometry `__ -and `Multi Level Instancing `__ -for examples of how to use instances. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, -`rtcSetGeometryInstancedScene <#rtcsetgeometryinstancedscene>`__, -`rtcSetGeometryTransform <#rtcsetgeometrytransform>`__ - -.. raw:: latex - - \pagebreak - -RTCCurveFlags -------------- - -NAME -^^^^ - -.. code:: cpp - - RTCCurveFlags - per segment flags for curve geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - -enum RTCCurveFlags { RTC_CURVE_FLAG_NEIGHBOR_LEFT = (1 << 0), -RTC_CURVE_FLAG_NEIGHBOR_RIGHT = (1 << 1) }; - -DESCRIPTION -^^^^^^^^^^^ - -The RTCCurveFlags type is used for linear curves to determine if the -left and/or right neighbor segment exist. Therefore one attaches a -buffer of type RTC_BUFFER_TYPE_FLAGS to the curve geometry which stores -an individual byte per curve segment. - -If the RTC_CURVE_FLAG_NEIGHBOR_LEFT flag in that byte is enabled for a -curve segment, then the left segment exists (which starts one vertex -before the start vertex of the current curve) and the current segment is -rendered to properly attach to that segment. - -If the RTC_CURVE_FLAG_NEIGHBOR_RIGHT flag in that byte is enabled for a -curve segment, then the right segment exists (which ends one vertex -after the end vertex of the current curve) and the current segment is -rendered to properly attach to that segment. - -When not properly specifying left and right flags for linear curves, the -rendering at the ending of these curves may not look correct, in -particular when round linear curves are viewed from the inside. - -EXIT STATUS -^^^^^^^^^^^ - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_CURVE <#rtc_geometry_type_curve>`__ - -.. raw:: latex - - \pagebreak - -rtcRetainGeometry ------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcRetainGeometry - increments the geometry reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcRetainGeometry(RTCGeometry geometry); - -DESCRIPTION -^^^^^^^^^^^ - -Geometry objects are reference counted. The ``rtcRetainGeometry`` -function increments the reference count of the passed geometry object -(``geometry`` argument). This function together with -``rtcReleaseGeometry`` allows to use the internal reference counting in -a C++ wrapper class to handle the ownership of the object. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, -`rtcReleaseGeometry <#rtcreleasegeometry>`__ - -.. raw:: latex - - \pagebreak - -rtcReleaseGeometry ------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcReleaseGeometry - decrements the geometry reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcReleaseGeometry(RTCGeometry geometry); - -DESCRIPTION -^^^^^^^^^^^ - -Geometry objects are reference counted. The ``rtcReleaseGeometry`` -function decrements the reference count of the passed geometry object -(``geometry`` argument). When the reference count falls to 0, the -geometry gets destroyed. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, -`rtcRetainGeometry <#rtcretaingeometry>`__ - -.. raw:: latex - - \pagebreak - -rtcCommitGeometry ------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcCommitGeometry - commits geometry changes - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcCommitGeometry(RTCGeometry geometry); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcCommitGeometry`` function is used to commit all geometry -changes performed to a geometry (``geometry`` parameter). After a -geometry gets modified, this function must be called to properly update -the internal state of the geometry to perform interpolations using -``rtcInterpolate`` or to commit a scene containing the geometry using -``rtcCommitScene``. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcInterpolate <#rtcinterpolate>`__, -`rtcCommitScene <#rtccommitscene>`__ - -.. raw:: latex - - \pagebreak - -rtcEnableGeometry ------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcEnableGeometry - enables the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcEnableGeometry(RTCGeometry geometry); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcEnableGeometry`` function enables the specified geometry -(``geometry`` argument). Only enabled geometries are rendered. Each -geometry is enabled by default at construction time. - -After enabling a geometry, the scene containing that geometry must be -committed using ``rtcCommitScene`` for the change to have effect. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, -`rtcDisableGeometry <#rtcdisablegeometry>`__, -`rtcCommitScene <#rtccommitscene>`__ - -.. raw:: latex - - \pagebreak - -rtcDisableGeometry ------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcDisableGeometry - disables the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcDisableGeometry(RTCGeometry geometry); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcDisableGeometry`` function disables the specified geometry -(``geometry`` argument). A disabled geometry is not rendered. Each -geometry is enabled by default at construction time. - -After disabling a geometry, the scene containing that geometry must be -committed using ``rtcCommitScene`` for the change to have effect. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, -`rtcEnableGeometry <#rtcenablegeometry>`__, -`rtcCommitScene <#rtccommitscene>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryTimeStepCount ---------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryTimeStepCount - sets the number of time steps of the - geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryTimeStepCount( - RTCGeometry geometry, - unsigned int timeStepCount - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryTimeStepCount`` function sets the number of time -steps for multi-segment motion blur (``timeStepCount`` parameter) of the -specified geometry (``geometry`` parameter). - -For triangle meshes (``RTC_GEOMETRY_TYPE_TRIANGLE``), quad meshes -(``RTC_GEOMETRY_TYPE_QUAD``), curves (``RTC_GEOMETRY_TYPE_CURVE``), -points (``RTC_GEOMETRY_TYPE_POINT``), and subdivision geometries -(``RTC_GEOMETRY_TYPE_SUBDIVISION``), the number of time steps directly -corresponds to the number of vertex buffer slots available -(``RTC_BUFFER_TYPE_VERTEX`` buffer type). For these geometries, one -vertex buffer per time step must be specified when creating -multi-segment motion blur geometries. - -For instance geometries (``RTC_GEOMETRY_TYPE_INSTANCE``), a -transformation must be specified for each time step (see -``rtcSetGeometryTransform``). - -For user geometries, the registered bounding callback function must -provide a bounding box per primitive and time step, and the intersection -and occlusion callback functions should properly intersect the -motion-blurred geometry at the ray time. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, -`rtcSetGeometryTimeRange <#rtcsetgeometrytimerange>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryTimeRange ------------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryTimeRange - sets the time range for a motion blur geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryTimeRange( - RTCGeometry geometry, - float startTime, - float endTime - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryTimeRange`` function sets a time range which defines -the start (and end time) of the first (and last) time step of a motion -blur geometry. The time range is defined relative to the camera shutter -interval [0,1] but it can be arbitrary. Thus the startTime can be -smaller, equal, or larger 0, indicating a geometry whose animation -definition start before, at, or after the camera shutter opens. Similar -the endTime can be smaller, equal, or larger than 1, indicating a -geometry whose animation definition ends after, at, or before the camera -shutter closes. The startTime has to be smaller or equal to the endTime. - -The default time range when this function is not called is the entire -camera shutter [0,1]. For best performance at most one time segment of -the piece wise linear definition of the motion should fall outside the -shutter window to the left and to the right. Thus do not set the -startTime or endTime too far outside the [0,1] interval for best -performance. - -This time range feature will also allow geometries to appear and -disappear during the camera shutter time if the specified time range is -a sub range of [0,1]. - -Please also have a look at the ``rtcSetGeometryTimeStepCount`` function -to see how to define the time steps for the specified time range. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryTimeStepCount <#rtcsetgeometrytimestepcount>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryVertexAttributeCount ----------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryVertexAttributeCount - sets the number of vertex - attributes of the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryVertexAttributeCount( - RTCGeometry geometry, - unsigned int vertexAttributeCount - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryVertexAttributeCount`` function sets the number of -slots (``vertexAttributeCount`` parameter) for vertex attribute buffers -(``RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE``) that can be used for the -specified geometry (``geometry`` parameter). - -This function is supported only for triangle meshes -(``RTC_GEOMETRY_TYPE_TRIANGLE``), quad meshes -(``RTC_GEOMETRY_TYPE_QUAD``), curves (``RTC_GEOMETRY_TYPE_CURVE``), -points (``RTC_GEOMETRY_TYPE_POINT``), and subdivision geometries -(``RTC_GEOMETRY_TYPE_SUBDIVISION``). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, `RTCBufferType <#rtcbuffertype>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryMask ------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryMask - sets the geometry mask - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryMask( - RTCGeometry geometry, - unsigned int mask - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryMask`` function sets a 32-bit geometry mask -(``mask`` argument) for the specified geometry (``geometry`` argument). - -This geometry mask is used together with the ray mask stored inside the -``mask`` field of the ray. The primitives of the geometry are hit by the -ray only if the bitwise ``and`` operation of the geometry mask with the -ray mask is not 0. This feature can be used to disable selected -geometries for specifically tagged rays, e.g. to disable shadow casting -for certain geometries. - -Ray masks are disabled in Embree by default at compile time, and can be -enabled through the ``EMBREE_RAY_MASK`` parameter in CMake. One can -query whether ray masks are enabled by querying the -``RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED`` device property using -``rtcGetDeviceProperty``. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTCRay <#rtcray>`__, `rtcGetDeviceProperty <#rtcgetdeviceproperty>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryBuildQuality --------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryBuildQuality - sets the build quality for the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryBuildQuality( - RTCGeometry geometry, - enum RTCBuildQuality quality - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryBuildQuality`` function sets the build quality -(``quality`` argument) for the specified geometry (``geometry`` -argument). The per-geometry build quality is only a hint and may be -ignored. Embree currently uses the per-geometry build quality when the -scene build quality is set to ``RTC_BUILD_QUALITY_LOW``. In this mode a -two-level acceleration structure is build, and geometries build a -separate acceleration structure using the geometry build quality. The -per-geometry build quality can be one of: - -- ``RTC_BUILD_QUALITY_LOW``: Creates lower quality data structures, - e.g. for dynamic scenes. - -- ``RTC_BUILD_QUALITY_MEDIUM``: Default build quality for most usages. - Gives a good compromise between build and render performance. - -- ``RTC_BUILD_QUALITY_HIGH``: Creates higher quality data structures - for final-frame rendering. Enables a spatial split builder for - certain primitive types. - -- ``RTC_BUILD_QUALITY_REFIT``: Uses a BVH refitting approach when - changing only the vertex buffer. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetSceneBuildQuality <#rtcsetscenebuildquality>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryBuffer --------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryBuffer - assigns a view of a buffer to the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryBuffer( - RTCGeometry geometry, - enum RTCBufferType type, - unsigned int slot, - enum RTCFormat format, - RTCBuffer buffer, - size_t byteOffset, - size_t byteStride, - size_t itemCount - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryBuffer`` function binds a view of a buffer object -(``buffer`` argument) to a geometry buffer type and slot (``type`` and -``slot`` argument) of the specified geometry (``geometry`` argument). - -One can specify the start of the first buffer element in bytes -(``byteOffset`` argument), the byte stride between individual buffer -elements (``byteStride`` argument), the format of the buffer elements -(``format`` argument), and the number of elements to bind -(``itemCount``). - -The start address (``byteOffset`` argument) and stride (``byteStride`` -argument) must be both aligned to 4 bytes, otherwise the -``rtcSetGeometryBuffer`` function will fail. - -After successful completion of this function, the geometry will hold a -reference to the buffer object. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetSharedGeometryBuffer <#rtcsetsharedgeometrybuffer>`__, -`rtcSetNewGeometryBuffer <#rtcsetnewgeometrybuffer>`__ - -.. raw:: latex - - \pagebreak - -rtcSetSharedGeometryBuffer --------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetSharedGeometryBuffer - assigns a view of a shared data buffer - to a geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetSharedGeometryBuffer( - RTCGeometry geometry, - enum RTCBufferType type, - unsigned int slot, - enum RTCFormat format, - const void* ptr, - size_t byteOffset, - size_t byteStride, - size_t itemCount - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetSharedGeometryBuffer`` function binds a view of a shared -user-managed data buffer (``ptr`` argument) to a geometry buffer type -and slot (``type`` and ``slot`` argument) of the specified geometry -(``geometry`` argument). - -One can specify the start of the first buffer element in bytes -(``byteOffset`` argument), the byte stride between individual buffer -elements (``byteStride`` argument), the format of the buffer elements -(``format`` argument), and the number of elements to bind -(``itemCount``). - -The start address (``byteOffset`` argument) and stride (``byteStride`` -argument) must be both aligned to 4 bytes; otherwise the -``rtcSetGeometryBuffer`` function will fail. - -When the buffer will be used as a vertex buffer -(``RTC_BUFFER_TYPE_VERTEX`` and ``RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE``), -the last buffer element must be readable using 16-byte SSE load -instructions, thus padding the last element is required for certain -layouts. E.g. a standard ``float3`` vertex buffer layout should add -storage for at least one more float to the end of the buffer. - -The buffer data must remain valid for as long as the buffer may be used, -and the user is responsible for freeing the buffer data when no longer -required. - -Sharing buffers can significantly reduce the memory required by the -application, thus we recommend using this feature. When enabling the -``RTC_SCENE_FLAG_COMPACT`` scene flag, the spatial index structures -index into the vertex buffer, resulting in even higher memory savings. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryBuffer <#rtcsetgeometrybuffer>`__, -`rtcSetNewGeometryBuffer <#rtcsetnewgeometrybuffer>`__ - -.. raw:: latex - - \pagebreak - -rtcSetNewGeometryBuffer ------------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcSetNewGeometryBuffer - creates and assigns a new data buffer to - the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void* rtcSetNewGeometryBuffer( - RTCGeometry geometry, - enum RTCBufferType type, - unsigned int slot, - enum RTCFormat format, - size_t byteStride, - size_t itemCount - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetNewGeometryBuffer`` function creates a new data buffer of -specified format (``format`` argument), byte stride (``byteStride`` -argument), and number of items (``itemCount`` argument), and assigns it -to a geometry buffer slot (``type`` and ``slot`` argument) of the -specified geometry (``geometry`` argument). The buffer data is managed -internally and automatically freed when the geometry is destroyed. - -The byte stride (``byteStride`` argument) must be aligned to 4 bytes; -otherwise the ``rtcSetNewGeometryBuffer`` function will fail. - -The allocated buffer will be automatically over-allocated slightly when -used as a vertex buffer, where a requirement is that each buffer element -should be readable using 16-byte SSE load instructions. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryBuffer <#rtcsetgeometrybuffer>`__, -`rtcSetSharedGeometryBuffer <#rtcsetsharedgeometrybuffer>`__ - -.. raw:: latex - - \pagebreak - -RTCFormat ---------- - -NAME -^^^^ - -.. code:: cpp - - RTCFormat - specifies format of data in buffers - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - enum RTCFormat - { - RTC_FORMAT_UINT, - RTC_FORMAT_UINT2, - RTC_FORMAT_UINT3, - RTC_FORMAT_UINT4, - - RTC_FORMAT_FLOAT, - RTC_FORMAT_FLOAT2, - RTC_FORMAT_FLOAT3, - RTC_FORMAT_FLOAT4, - RTC_FORMAT_FLOAT5, - RTC_FORMAT_FLOAT6, - RTC_FORMAT_FLOAT7, - RTC_FORMAT_FLOAT8, - RTC_FORMAT_FLOAT9, - RTC_FORMAT_FLOAT10, - RTC_FORMAT_FLOAT11, - RTC_FORMAT_FLOAT12, - RTC_FORMAT_FLOAT13, - RTC_FORMAT_FLOAT14, - RTC_FORMAT_FLOAT15, - RTC_FORMAT_FLOAT16, - - RTC_FORMAT_FLOAT3X4_ROW_MAJOR, - RTC_FORMAT_FLOAT4X4_ROW_MAJOR, - - RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR, - RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR, - - RTC_FORMAT_GRID, - -}; - -DESCRIPTION -^^^^^^^^^^^ - -The ``RTFormat`` structure defines the data format stored in data -buffers provided to Embree using the -`rtcSetGeometryBuffer <#rtcsetgeometrybuffer>`__, -`rtcSetSharedGeometryBuffer <#rtcsetsharedgeometrybuffer>`__, and -`rtcSetNewGeometryBuffer <#rtcsetnewgeometrybuffer>`__ API calls. - -The ``RTC_FORMAT_UINT/2/3/4`` format are used to specify that data -buffers store unsigned integers, or unsigned integer vectors of size 2,3 -or 4. This format has typically to get used when specifying index -buffers, e.g. ``RTC_FORMAT_UINT3`` for triangle meshes. - -The ``RTC_FORMAT_FLOAT/2/3/4...`` format are used to specify that data -buffers store single precision floating point values, or vectors there -of (size 2,3,4, etc.). This format is typically used to specify to format -of vertex buffers, e.g. the ``RTC_FORMAT_FLOAT3`` type for vertex -buffers of triangle meshes. - -The ``RTC_FORMAT_FLOAT3X4_ROW_MAJOR`` and -``RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR`` formats, specify a 3x4 floating -point matrix layed out either row major or column major. The -``RTC_FORMAT_FLOAT4X4_ROW_MAJOR`` and -``RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR`` formats, specify a 4x4 floating -point matrix layed out either row major or column major. These matrix -formats are used in the -`rtcSetGeometryTransform <#rtcsetgeometrytransform>`__ function in order -to set a transformation matrix for geometries. - -The ``RTC_FORMAT_GRID`` is a special data format used to specify grid -primitives of layout RTCGrid when creating grid geometries (see -`RTC_GEOMETRY_TYPE_GRID <#rtc_geometry_type_grid>`__). - -EXIT STATUS -^^^^^^^^^^^ - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryBuffer <#rtcsetgeometrybuffer>`__, -`rtcSetSharedGeometryBuffer <#rtcsetsharedgeometrybuffer>`__, -`rtcSetNewGeometryBuffer <#rtcsetnewgeometrybuffer>`__, -`rtcSetGeometryTransform <#rtcsetgeometrytransform>`__ - -.. raw:: latex - - \pagebreak - -RTCBufferType -------------- - -NAME -^^^^ - -.. code:: cpp - - RTCFormat - specifies format of data in buffers - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - enum RTCBufferType - { - RTC_BUFFER_TYPE_INDEX = 0, - RTC_BUFFER_TYPE_VERTEX = 1, - RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE = 2, - RTC_BUFFER_TYPE_NORMAL = 3, - RTC_BUFFER_TYPE_TANGENT = 4, - RTC_BUFFER_TYPE_NORMAL_DERIVATIVE = 5, - - RTC_BUFFER_TYPE_GRID = 8, - - RTC_BUFFER_TYPE_FACE = 16, - RTC_BUFFER_TYPE_LEVEL = 17, - RTC_BUFFER_TYPE_EDGE_CREASE_INDEX = 18, - RTC_BUFFER_TYPE_EDGE_CREASE_WEIGHT = 19, - RTC_BUFFER_TYPE_VERTEX_CREASE_INDEX = 20, - RTC_BUFFER_TYPE_VERTEX_CREASE_WEIGHT = 21, - RTC_BUFFER_TYPE_HOLE = 22, - - RTC_BUFFER_TYPE_FLAGS = 32 - }; - -DESCRIPTION -^^^^^^^^^^^ - -The ``RTBufferType`` structure defines slots to assign data buffers to -using the `rtcSetGeometryBuffer <#rtcsetgeometrybuffer>`__, -`rtcSetSharedGeometryBuffer <#rtcsetsharedgeometrybuffer>`__, and -`rtcSetNewGeometryBuffer <#rtcsetnewgeometrybuffer>`__ API calls. - -For most geometry types the ``RTC_BUFFER_TYPE_INDEX`` slot is used to -assign an index buffer, while the ``RTC_BUFFER_TYPE_VERTEX`` is used to -assign the corresponding vertex buffer. - -The ``RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE`` slot can get used to assign -arbitrary additional vertex data which can get interpolated using the -`rtcInterpolate <#rtcinterpolate>`__ API call. - -The ``RTC_BUFFER_TYPE_NORMAL``, ``RTC_BUFFER_TYPE_TANGENT``, and -``RTC_BUFFER_TYPE_NORMAL_DERIVATIVE`` are special buffers required to -assign per vertex normals, tangents, and normal derivatives for some -curve types. - -The ``RTC_BUFFER_TYPE_GRID`` buffer is used to assign the grid primitive -buffer for grid geometries (see -`RTC_GEOMETRY_TYPE_GRID <#rtc_geometry_type_grid>`__). - -The ``RTC_BUFFER_TYPE_FACE``, ``RTC_BUFFER_TYPE_LEVEL``, -``RTC_BUFFER_TYPE_EDGE_CREASE_INDEX``, -``RTC_BUFFER_TYPE_EDGE_CREASE_WEIGHT``, -``RTC_BUFFER_TYPE_VERTEX_CREASE_INDEX``, -``RTC_BUFFER_TYPE_VERTEX_CREASE_WEIGHT``, and ``RTC_BUFFER_TYPE_HOLE`` -are special buffers required to create subdivision meshes (see -`RTC_GEOMETRY_TYPE_SUBDIVISION <#rtc_geometry_type_subdivision>`__). - -The ``RTC_BUFFER_TYPE_FLAGS`` can get used to add additional flag per -primitive of a geometry, and is currently only used for linear curves. - -EXIT STATUS -^^^^^^^^^^^ - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryBuffer <#rtcsetgeometrybuffer>`__, -`rtcSetSharedGeometryBuffer <#rtcsetsharedgeometrybuffer>`__, -`rtcSetNewGeometryBuffer <#rtcsetnewgeometrybuffer>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometryBufferData ------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometryBufferData - gets pointer to - the first buffer view element - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void* rtcGetGeometryBufferData( - RTCGeometry geometry, - enum RTCBufferType type, - unsigned int slot - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometryBufferData`` function returns a pointer to the first -element of the buffer view attached to the specified buffer type and -slot (``type`` and ``slot`` argument) of the geometry (``geometry`` -argument). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryBuffer <#rtcsetgeometrybuffer>`__, -`rtcSetSharedGeometryBuffer <#rtcsetsharedgeometrybuffer>`__, -`rtcSetNewGeometryBuffer <#rtcsetnewgeometrybuffer>`__ - -.. raw:: latex - - \pagebreak - -rtcUpdateGeometryBuffer ------------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcUpdateGeometryBuffer - marks a buffer view bound to the geometry - as modified - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcUpdateGeometryBuffer( - RTCGeometry geometry, - enum RTCBufferType type, - unsigned int slot - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcUpdateGeometryBuffer`` function marks the buffer view bound to -the specified buffer type and slot (``type`` and ``slot`` argument) of a -geometry (``geometry`` argument) as modified. - -If a data buffer is changed by the application, the -``rtcUpdateGeometryBuffer`` call must be invoked for that buffer. Each -buffer view assigned to a buffer slot is initially marked as modified, -thus this function needs to be called only when doing buffer -modifications after the first ``rtcCommitScene``. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewGeometry <#rtcnewgeometry>`__, -`rtcCommitScene <#rtccommitscene>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryIntersectFilterFunction -------------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryIntersectFilterFunction - sets the intersection filter - for the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCFilterFunctionNArguments - { - int* valid; - void* geometryUserPtr; - const struct RTCIntersectContext* context; - struct RTCRayN* ray; - struct RTCHitN* hit; - unsigned int N; - }; - - typedef void (*RTCFilterFunctionN)( - const struct RTCFilterFunctionNArguments* args - ); - - void rtcSetGeometryIntersectFilterFunction( - RTCGeometry geometry, - RTCFilterFunctionN filter - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryIntersectFilterFunction`` function registers an -intersection filter callback function (``filter`` argument) for the -specified geometry (``geometry`` argument). - -Only a single callback function can be registered per geometry, and -further invocations overwrite the previously set callback function. -Passing ``NULL`` as function pointer disables the registered callback -function. - -The registered intersection filter function is invoked for every hit -encountered during the ``rtcIntersect``-type ray queries and can accept -or reject that hit. The feature can be used to define a silhouette for a -primitive and reject hits that are outside the silhouette. E.g. a tree -leaf could be modeled with an alpha texture that decides whether hit -points lie inside or outside the leaf. - -If the ``RTC_BUILD_QUALITY_HIGH`` mode is set, the filter functions may -be called multiple times for the same primitive hit. Further, rays -hitting exactly the edge might also report two hits for the same -surface. For certain use cases, the application may have to work around -this limitation by collecting already reported hits -(``geomID``/``primID`` pairs) and ignoring duplicates. - -The filter function callback of type ``RTCFilterFunctionN`` gets passed -a number of arguments through the ``RTCFilterFunctionNArguments`` -structure. The ``valid`` parameter of that structure points to an -integer valid mask (0 means invalid and -1 means valid). The -``geometryUserPtr`` member is a user pointer optionally set per geometry -through the ``rtcSetGeometryUserData`` function. The ``context`` member -points to the intersection context passed to the ray query function. The -``ray`` parameter points to ``N`` rays in SOA layout. The ``hit`` -parameter points to ``N`` hits in SOA layout to test. The ``N`` -parameter is the number of rays and hits in ``ray`` and ``hit``. The hit -distance is provided as the ``tfar`` value of the ray. If the hit -geometry is instanced, the ``instID`` member of the ray is valid, and -the ray and the potential hit are in object space. - -The filter callback function has the task to check for each valid ray -whether it wants to accept or reject the corresponding hit. To reject a -hit, the filter callback function just has to write ``0`` to the integer -valid mask of the corresponding ray. To accept the hit, it just has to -leave the valid mask set to ``-1``. The filter function is further -allowed to change the hit and decrease the ``tfar`` value of the ray but -it should not modify other ray data nor any inactive components of the -ray or hit. - -When performing ray queries using ``rtcIntersect1``, it is guaranteed -that the packet size is 1 when the callback is invoked. When performing -ray queries using the ``rtcIntersect4/8/16`` functions, it is not -generally guaranteed that the ray packet size (and order of rays inside -the packet) passed to the callback matches the initial ray packet. -However, under some circumstances these properties are guaranteed, and -whether this is the case can be queried using ``rtcGetDeviceProperty``. -When performing ray queries using the stream API such as -``rtcIntersect1M``, ``rtcIntersect1Mp``, ``rtcIntersectNM``, or -``rtcIntersectNp`` the order of rays and ray packet size of the callback -function might change to either 1, 4, 8, or 16. - -For many usage scenarios, repacking and re-ordering of rays does not -cause difficulties in implementing the callback function. However, -algorithms that need to extend the ray with additional data must use the -``rayID`` component of the ray to identify the original ray to access -the per-ray data. - -The implementation of the filter function can choose to implement a -single code path that uses the ray access helper functions -``RTCRay_XXX`` and hit access helper functions ``RTCHit_XXX`` to access -ray and hit data. Alternatively the code can branch to optimized -implementations for specific sizes of ``N`` and cast the ``ray`` and -``hit`` inputs to the proper packet types. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryOccludedFilterFunction <#rtcsetgeometryoccludedfilterfunction>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryOccludedFilterFunction ------------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryOccludedFilterFunction - sets the occlusion filter - for the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryOccludedFilterFunction( - RTCGeometry geometry, - RTCFilterFunctionN filter - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryOccludedFilterFunction`` function registers an -occlusion filter callback function (``filter`` argument) for the -specified geometry (``geometry`` argument). - -Only a single callback function can be registered per geometry, and -further invocations overwrite the previously set callback function. -Passing ``NULL`` as function pointer disables the registered callback -function. - -The registered intersection filter function is invoked for every hit -encountered during the ``rtcOccluded``-type ray queries and can accept -or reject that hit. The feature can be used to define a silhouette for a -primitive and reject hits that are outside the silhouette. E.g. a tree -leaf could be modeled with an alpha texture that decides whether hit -points lie inside or outside the leaf. - -Please see the description of the -``rtcSetGeometryIntersectFilterFunction`` for a description of the -filter callback function. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryIntersectFilterFunction <#rtcsetgeometryintersectfilterfunction>`__ - -.. raw:: latex - - \pagebreak - -rtcFilterIntersection ---------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcFilterIntersection - invokes the intersection filter function - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcFilterIntersection( - const struct RTCIntersectFunctionNArguments* args, - const struct RTCFilterFunctionNArguments* filterArgs - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcFilterIntersection`` function can be called inside an -``RTCIntersectFunctionN`` callback function to invoke the intersection -filter registered to the geometry and stored inside the context. For -this an ``RTCFilterFunctionNArguments`` structure must be created (see -``rtcSetGeometryIntersectFilterFunction``) which basically consists of a -valid mask, a hit packet to filter, the corresponding ray packet, and -the packet size. After the invocation of ``rtcFilterIntersection``, only -rays that are still valid (valid mask set to -1) should update a hit. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcFilterOcclusion <#rtcfilterocclusion>`__, -`rtcSetGeometryIntersectFunction <#rtcsetgeometryintersectfunction>`__ - -.. raw:: latex - - \pagebreak - -rtcFilterOcclusion ------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcFilterOcclusion - invokes the occlusion filter function - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcFilterOcclusion( - const struct RTCOccludedFunctionNArguments* args, - const struct RTCFilterFunctionNArguments* filterArgs - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcFilterOcclusion`` function can be called inside an -``RTCOccludedFunctionN`` callback function to invoke the occlusion -filter registered to the geometry and stored inside the context. For -this an ``RTCFilterFunctionNArguments`` structure must be created (see -``rtcSetGeometryIntersectFilterFunction``) which basically consists of a -valid mask, a hit packet to filter, the corresponding ray packet, and -the packet size. After the invocation of ``rtcFilterOcclusion`` only -rays that are still valid (valid mask set to -1) should signal an -occlusion. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcFilterIntersection <#rtcfilterintersection>`__, -`rtcSetGeometryOccludedFunction <#rtcsetgeometryoccludedfunction>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryUserData ----------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryUserData - sets the user-defined data pointer of the - geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryUserData(RTCGeometry geometry, void* userPtr); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryUserData`` function sets the user-defined data -pointer (``userPtr`` argument) for a geometry (``geometry`` argument). -This user data pointer is intended to be pointing to the application’s -representation of the geometry, and is passed to various callback -functions. The application can use this pointer inside the callback -functions to access its geometry representation. - -The ``rtcGetGeometryUserData`` function can be used to query an already -set user data pointer of a geometry. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetGeometryUserData <#rtcgetgeometryuserdata>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometryUserData ----------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometryUserData - returns the user data pointer - of the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void* rtcGetGeometryUserData(RTCGeometry geometry); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometryUserData`` function queries the user data pointer -previously set with ``rtcSetGeometryUserData``. When -``rtcSetGeometryUserData`` was not called yet, ``NULL`` is returned. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryUserData <#rtcsetgeometryuserdata>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryUserPrimitiveCount --------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryUserPrimitiveCount - sets the number of primitives - of a user-defined geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryUserPrimitiveCount( - RTCGeometry geometry, - unsigned int userPrimitiveCount - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryUserPrimitiveCount`` function sets the number of -user-defined primitives (``userPrimitiveCount`` parameter) of the -specified user-defined geometry (``geometry`` parameter). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_USER <#rtc_geometry_type_user>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryBoundsFunction ----------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryBoundsFunction - sets a callback to query the - bounding box of user-defined primitives - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCBoundsFunctionArguments - { - void* geometryUserPtr; - unsigned int primID; - unsigned int timeStep; - struct RTCBounds* bounds_o; - }; - - typedef void (*RTCBoundsFunction)( - const struct RTCBoundsFunctionArguments* args - ); - - void rtcSetGeometryBoundsFunction( - RTCGeometry geometry, - RTCBoundsFunction bounds, - void* userPtr - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryBoundsFunction`` function registers a bounding box -callback function (``bounds`` argument) with payload (``userPtr`` -argument) for the specified user geometry (``geometry`` argument). - -Only a single callback function can be registered per geometry, and -further invocations overwrite the previously set callback function. -Passing ``NULL`` as function pointer disables the registered callback -function. - -The registered bounding box callback function is invoked to calculate -axis-aligned bounding boxes of the primitives of the user-defined -geometry during spatial acceleration structure construction. The -bounding box callback of ``RTCBoundsFunction`` type is invoked with a -pointer to a structure of type ``RTCBoundsFunctionArguments`` which -contains various arguments, such as: the user data of the geometry -(``geometryUserPtr`` member), the ID of the primitive to calculate the -bounds for (``primID`` member), the time step at which to calculate the -bounds (``timeStep`` member), and a memory location to write the -calculated bound to (``bounds_o`` member). - -In a typical usage scenario one would store a pointer to the internal -representation of the user geometry object using -``rtcSetGeometryUserData``. The callback function can then read that -pointer from the ``geometryUserPtr`` field and calculate the proper -bounding box for the requested primitive and time, and store that -bounding box to the destination structure (``bounds_o`` member). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_USER <#rtc_geometry_type_user>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryIntersectFunction -------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryIntersectFunction - sets the callback function to - intersect a user geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCIntersectFunctionNArguments - { - int* valid; - void* geometryUserPtr; - unsigned int primID; - struct RTCIntersectContext* context; - struct RTCRayHitN* rayhit; - unsigned int N; - unsigned int geomID; - }; - - typedef void (*RTCIntersectFunctionN)( - const struct RTCIntersectFunctionNArguments* args - ); - - void rtcSetGeometryIntersectFunction( - RTCGeometry geometry, - RTCIntersectFunctionN intersect - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryIntersectFunction`` function registers a -ray/primitive intersection callback function (``intersect`` argument) -for the specified user geometry (``geometry`` argument). - -Only a single callback function can be registered per geometry and -further invocations overwrite the previously set callback function. -Passing ``NULL`` as function pointer disables the registered callback -function. - -The registered callback function is invoked by ``rtcIntersect``-type ray -queries to calculate the intersection of a ray packet of variable size -with one user-defined primitive. The callback function of type -``RTCIntersectFunctionN`` gets passed a number of arguments through the -``RTCIntersectFunctionNArguments`` structure. The value ``N`` specifies -the ray packet size, ``valid`` points to an array of integers that -specify whether the corresponding ray is valid (-1) or invalid (0), the -``geometryUserPtr`` member points to the geometry user data previously -set through ``rtcSetGeometryUserData``, the ``context`` member points to -the intersection context passed to the ray query, the ``rayhit`` member -points to a ray and hit packet of variable size ``N``, and the -``geomID`` and ``primID`` member identifies the geometry ID and -primitive ID of the primitive to intersect. - -The ``ray`` component of the ``rayhit`` structure contains valid data, -in particular the ``tfar`` value is the current closest hit distance -found. All data inside the ``hit`` component of the ``rayhit`` structure -are undefined and should not be read by the function. - -The task of the callback function is to intersect each active ray from -the ray packet with the specified user primitive. If the user-defined -primitive is missed by a ray of the ray packet, the function should -return without modifying the ray or hit. If an intersection of the -user-defined primitive with the ray was found in the valid range (from -``tnear`` to ``tfar``), it should update the hit distance of the ray -(``tfar`` member) and the hit (``u``, ``v``, ``Ng``, ``instID``, -``geomID``, ``primID`` members). In particular, the currently -intersected instance is stored in the ``instID`` field of the -intersection context, which must be deep copied into the ``instID`` -member of the hit. - -As a primitive might have multiple intersections with a ray, the -intersection filter function needs to be invoked by the user geometry -intersection callback for each encountered intersection, if filtering of -intersections is desired. This can be achieved through the -``rtcFilterIntersection`` call. - -Within the user geometry intersect function, it is safe to trace new -rays and create new scenes and geometries. - -When performing ray queries using ``rtcIntersect1``, it is guaranteed -that the packet size is 1 when the callback is invoked. When performing -ray queries using the ``rtcIntersect4/8/16`` functions, it is not -generally guaranteed that the ray packet size (and order of rays inside -the packet) passed to the callback matches the initial ray packet. -However, under some circumstances these properties are guaranteed, and -whether this is the case can be queried using ``rtcGetDeviceProperty``. -When performing ray queries using the stream API such as -``rtcIntersect1M``, ``rtcIntersect1Mp``, ``rtcIntersectNM``, or -``rtcIntersectNp`` the order of rays and ray packet size of the callback -function might change to either 1, 4, 8, or 16. - -For many usage scenarios, repacking and re-ordering of rays does not -cause difficulties in implementing the callback function. However, -algorithms that need to extend the ray with additional data must use the -``rayID`` component of the ray to identify the original ray to access -the per-ray data. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryOccludedFunction <#rtcsetgeometryoccludedfunction>`__, -`rtcSetGeometryUserData <#rtcsetgeometryuserdata>`__, -`rtcFilterIntersection <#rtcfilterintersection>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryOccludedFunction ------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryOccludedFunction - sets the callback function to - test a user geometry for occlusion - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCOccludedFunctionNArguments - { - int* valid; - void* geometryUserPtr; - unsigned int primID; - struct RTCIntersectContext* context; - struct RTCRayN* ray; - unsigned int N; - unsigned int geomID; - }; - - typedef void (*RTCOccludedFunctionN)( - const struct RTCOccludedFunctionNArguments* args - ); - - void rtcSetGeometryOccludedFunction( - RTCGeometry geometry, - RTCOccludedFunctionN filter - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryOccludedFunction`` function registers a -ray/primitive occlusion callback function (``filter`` argument) for the -specified user geometry (``geometry`` argument). - -Only a single callback function can be registered per geometry, and -further invocations overwrite the previously set callback function. -Passing ``NULL`` as function pointer disables the registered callback -function. - -The registered callback function is invoked by ``rtcOccluded``-type ray -queries to test whether the rays of a packet of variable size are -occluded by a user-defined primitive. The callback function of type -``RTCOccludedFunctionN`` gets passed a number of arguments through the -``RTCOccludedFunctionNArguments`` structure. The value ``N`` specifies -the ray packet size, ``valid`` points to an array of integers which -specify whether the corresponding ray is valid (-1) or invalid (0), the -``geometryUserPtr`` member points to the geometry user data previously -set through ``rtcSetGeometryUserData``, the ``context`` member points to -the intersection context passed to the ray query, the ``ray`` member -points to a ray packet of variable size ``N``, and the ``geomID`` and -``primID`` member identifies the geometry ID and primitive ID of the -primitive to intersect. - -The task of the callback function is to intersect each active ray from -the ray packet with the specified user primitive. If the user-defined -primitive is missed by a ray of the ray packet, the function should -return without modifying the ray. If an intersection of the user-defined -primitive with the ray was found in the valid range (from ``tnear`` to -``tfar``), it should set the ``tfar`` member of the ray to ``-inf``. - -As a primitive might have multiple intersections with a ray, the -occlusion filter function needs to be invoked by the user geometry -occlusion callback for each encountered intersection, if filtering of -intersections is desired. This can be achieved through the -``rtcFilterOcclusion`` call. - -Within the user geometry occlusion function, it is safe to trace new -rays and create new scenes and geometries. - -When performing ray queries using ``rtcOccluded1``, it is guaranteed -that the packet size is 1 when the callback is invoked. When performing -ray queries using the ``rtcOccluded4/8/16`` functions, it is not -generally guaranteed that the ray packet size (and order of rays inside -the packet) passed to the callback matches the initial ray packet. -However, under some circumstances these properties are guaranteed, and -whether this is the case can be queried using ``rtcGetDeviceProperty``. -When performing ray queries using the stream API such as -``rtcOccluded1M``, ``rtcOccluded1Mp``, ``rtcOccludedNM``, or -``rtcOccludedNp`` the order of rays and ray packet size of the callback -function might change to either 1, 4, 8, or 16. - -For many usage scenarios, repacking and re-ordering of rays does not -cause difficulties in implementing the callback function. However, -algorithms that need to extend the ray with additional data must use the -``rayID`` component of the ray to identify the original ray to access -the per-ray data. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryIntersectFunction <#rtcsetgeometryintersectfunction>`__, -`rtcSetGeometryUserData <#rtcsetgeometryuserdata>`__, -`rtcFilterOcclusion <#rtcfilterocclusion>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryPointQueryFunction --------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryPointQueryFunction - sets the point query callback function - for a geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCPointQueryFunctionArguments - { - // the (world space) query object that was passed as an argument of rtcPointQuery. - struct RTCPointQuery* query; - - // used for user input/output data. Will not be read or modified internally. - void* userPtr; - - // primitive and geometry ID of primitive - unsigned int primID; - unsigned int geomID; - - // the context with transformation and instance ID stack - struct RTCPointQueryContext* context; - - // scaling factor indicating whether the current instance transformation - // is a similarity transformation. - float similarityScale; - }; - - typedef bool (*RTCPointQueryFunction)( - struct RTCPointQueryFunctionArguments* args - ); - - void rtcSetGeometryPointQueryFunction( - RTCGeometry geometry, - RTCPointQueryFunction queryFunc - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryPointQueryFunction`` function registers a point -query callback function (``queryFunc`` argument) for the specified -geometry (``geometry`` argument). - -Only a single callback function can be registered per geometry and -further invocations overwrite the previously set callback function. -Passing ``NULL`` as function pointer disables the registered callback -function. - -The registered callback function is invoked by -`rtcPointQuery <#rtcpointquery>`__ for every primitive of the geometry -that intersects the corresponding point query domain. The callback -function of type ``RTCPointQueryFunction`` gets passed a number of -arguments through the ``RTCPointQueryFunctionArguments`` structure. The -``query`` object is the original point query object passed into -`rtcPointQuery <#rtcpointquery>`__, ``usrPtr`` is an arbitrary pointer -to pass input into and store results of the callback function. The -``primID``, ``geomID`` and ``context`` (see -`rtcInitPointQueryContext <#rtcinitpointquerycontext>`__ for details) -can be used to identify the geometry data of the primitive. - -A ``RTCPointQueryFunction`` can also be passed directly as an argument -to `rtcPointQuery <#rtcpointquery>`__. In this case the callback is -invoked for all primitives in the scene that intersect the query domain. -If a callback function is passed as an argument to -`rtcPointQuery <#rtcpointquery>`__ and (a potentially different) -callback function is set for a geometry with -`rtcSetGeometryPointQueryFunction <#rtcsetgeometrypointqueryfunction>`__ -both callback functions are invoked and the callback function passed to -`rtcPointQuery <#rtcpointquery>`__ will be called before the geometry -specific callback function. - -If instancing is used, the parameter ``simliarityScale`` indicates -whether the current instance transform (top element of the stack in -``context``) is a similarity transformation or not. Similarity -transformations are composed of translation, rotation and uniform -scaling and if a matrix M defines a similarity transformation, there is -a scaling factor D such that for all x,y: dist(Mx, My) = D \* dist(x, -y). In this case the parameter ``scalingFactor`` is this scaling factor -D and otherwise it is 0. A valid similarity scale (``similarityScale`` > -0) allows to compute distance information in instance space and scale -the distances into world space (for example, to update the query radius, -see below) by dividing the instance space distance with the similarity -scale. If the current instance transform is not a similarity transform -(``similarityScale`` is 0), the distance computation has to be performed -in world space to ensure correctness. In this case the instance to world -transformations given with the ``context`` should be used to transform -the primitive data into world space. Otherwise, the query location can -be transformed into instance space which can be more efficient. If there -is no instance transform, the similarity scale is 1. - -The callback function will potentially be called for primitives outside -the query domain for two reasons: First, the callback is invoked for all -primitives inside a BVH leaf node since no geometry data of primitives -is determined internally and therefore individual primitives are not -culled (only their (aggregated) bounding boxes). Second, in case non -similarity transformations are used, the resulting ellipsoidal query -domain (in instance space) is approximated by its axis aligned bounding -box internally and therefore inner nodes that do not intersect the -original domain might intersect the approximative bounding box which -results in unnecessary callbacks. In any case, the callbacks are -conservative, i.e. if a primitive is inside the query domain a callback -will be invoked but the reverse is not necessarily true. - -For efficiency, the radius of the ``query`` object can be decreased (in -world space) inside the callback function to improve culling of geometry -during BVH traversal. If the query radius was updated, the callback -function should return ``true`` to issue an update of internal traversal -information. Increasing the radius or modifying the time or position of -the query results in undefined behaviour. - -Within the callback function, it is safe to call -`rtcPointQuery <#rtcpointquery>`__ again, for example when implementing -instancing manually. In this case the instance transformation should be -pushed onto the stack in ``context``. Embree will internally compute the -point query information in instance space using the top element of the -stack in ``context`` when `rtcPointQuery <#rtcpointquery>`__ is called. - -For a reference implementation of a closest point traversal of triangle -meshes using instancing and user defined instancing see the tutorial -[ClosestPoint]. - -SEE ALSO -^^^^^^^^ - -`rtcPointQuery <#rtcpointquery>`__, -`rtcInitPointQueryContext <#rtcinitpointquerycontext>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryInstancedScene ----------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryInstancedScene - sets the instanced scene of - an instance geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryInstancedScene( - RTCGeometry geometry, - RTCScene scene - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryInstancedScene`` function sets the instanced scene -(``scene`` argument) of the specified instance geometry (``geometry`` -argument). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_INSTANCE <#rtc_geometry_type_instance>`__, -`rtcSetGeometryTransform <#rtcsetgeometrytransform>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryTransform ------------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryTransform - sets the transformation for a particular - time step of an instance geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryTransform( - RTCGeometry geometry, - unsigned int timeStep, - enum RTCFormat format, - const float* xfm - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryTransform`` function sets the local-to-world affine -transformation (``xfm`` parameter) of an instance geometry (``geometry`` -parameter) for a particular time step (``timeStep`` parameter). The -transformation is specified as a 3×4 matrix (3×3 linear transformation -plus translation), for which the following formats (``format`` -parameter) are supported: - -- ``RTC_FORMAT_FLOAT3X4_ROW_MAJOR``: The 3×4 float matrix is laid out - in row-major form. - -- ``RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR``: The 3×4 float matrix is laid - out in column-major form. - -- ``RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR``: The 3×4 float matrix is laid - out in column-major form as a 4×4 homogeneous matrix with the last - row being equal to (0, 0, 0, 1). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_INSTANCE <#rtc_geometry_type_instance>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryTransformQuaternion ---------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryTransformQuaternion - sets the transformation for a particular - time step of an instance geometry as a decomposition of the - transformation matrix using quaternions to represent the rotation. - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryTransformQuaternion( - RTCGeometry geometry, - unsigned int timeStep, - const struct RTCQuaternionDecomposition* qd - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryTransformQuaternion`` function sets the -local-to-world affine transformation (``qd`` parameter) of an instance -geometry (``geometry`` parameter) for a particular time step -(``timeStep`` parameter). The transformation is specified as a -`RTCQuaternionDecomposition <#rtcquaterniondecomposition>`__, which is a -decomposition of an affine transformation that represents the rotational -component of an affine transformation as a quaternion. This allows -interpolating rotational transformations exactly using spherical linear -interpolation (such as a turning wheel). - -For more information about the decomposition see -`RTCQuaternionDecomposition <#rtcquaterniondecomposition>`__. The -quaternion given in the ``RTCQuaternionDecomposition`` struct will be -normalized internally. - -For correct results, the transformation matrices for all time steps must -be set either using ``rtcSetGeometryTransform`` or -``rtcSetGeometryTransformQuaternion``. Mixing both representations is -not allowed. Spherical linear interpolation will be used, iff the -transformation matrices are set with -``rtcSetGeometryTransformQuaternion``. - -For an example of this feature see the tutorial `Quaternion Motion -Blur `__. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcInitQuaternionDecomposition <#rtcinitquaterniondecomposition>`__, -`rtcSetGeometryTransform <#rtcsetgeometrytransform>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometryTransform ------------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometryTransform - returns the interpolated instance - transformation for the specified time - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcGetGeometryTransform( - RTCGeometry geometry, - float time, - enum RTCFormat format, - void* xfm - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometryTransform`` function returns the interpolated local -to world transformation (``xfm`` parameter) of an instance geometry -(``geometry`` parameter) for a particular time (``time`` parameter in -range :math:`[0,1]`) in the specified format (``format`` parameter). - -Possible formats for the returned matrix are: - -- ``RTC_FORMAT_FLOAT3X4_ROW_MAJOR``: The 3×4 float matrix is laid out - in row-major form. - -- ``RTC_FORMAT_FLOAT3X4_COLUMN_MAJOR``: The 3×4 float matrix is laid - out in column-major form. - -- ``RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR``: The 3×4 float matrix is laid - out in column-major form as a 4×4 homogeneous matrix with last row - equal to (0, 0, 0, 1). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_INSTANCE <#rtc_geometry_type_instance>`__, -`rtcSetGeometryTransform <#rtcsetgeometrytransform>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryTessellationRate ------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryTessellationRate - sets the tessellation rate of the - geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryTessellationRate( - RTCGeometry geometry, - float tessellationRate - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryTessellationRate`` function sets the tessellation -rate (``tessellationRate`` argument) for the specified geometry -(``geometry`` argument). The tessellation rate can only be set for flat -curves and subdivision geometries. For curves, the tessellation rate -specifies the number of ray-facing quads per curve segment. For -subdivision surfaces, the tessellation rate specifies the number of -quads along each edge. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_CURVE <#rtc_geometry_type_curve>`__, -`RTC_GEOMETRY_TYPE_SUBDIVISION <#rtc_geometry_type_subdivision>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryTopologyCount ---------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryTopologyCount - sets the number of topologies of - a subdivision geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryTopologyCount( - RTCGeometry geometry, - unsigned int topologyCount - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryTopologyCount`` function sets the number of -topologies (``topologyCount`` parameter) for the specified subdivision -geometry (``geometry`` parameter). The number of topologies of a -subdivision geometry must be greater or equal to 1. - -To use multiple topologies, first the number of topologies must be -specified, then the individual topologies can be configured using -``rtcSetGeometrySubdivisionMode`` and by setting an index buffer -(``RTC_BUFFER_TYPE_INDEX``) using the topology ID as the buffer slot. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_SUBDIVISION <#rtc_geometry_type_subdivision>`__, -`rtcSetGeometrySubdivisionMode <#rtcsetgeometrysubdivisionmode>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometrySubdivisionMode ------------------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometrySubdivisionMode - sets the subdivision mode - of a subdivision geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometrySubdivisionMode( - RTCGeometry geometry, - unsigned int topologyID, - enum RTCSubdivisionMode mode - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometrySubdivisionMode`` function sets the subdivision mode -(``mode`` parameter) for the topology (``topologyID`` parameter) of the -specified subdivision geometry (``geometry`` parameter). - -The subdivision modes can be used to force linear interpolation for -certain parts of the subdivision mesh: - -- ``RTC_SUBDIVISION_MODE_NO_BOUNDARY``: Boundary patches are ignored. - This way each rendered patch has a full set of control vertices. - -- ``RTC_SUBDIVISION_MODE_SMOOTH_BOUNDARY``: The sequence of boundary - control points are used to generate a smooth B-spline boundary curve - (default mode). - -- ``RTC_SUBDIVISION_MODE_PIN_CORNERS``: Corner vertices are pinned to - their location during subdivision. - -- ``RTC_SUBDIVISION_MODE_PIN_BOUNDARY``: All vertices at the border are - pinned to their location during subdivision. This way the boundary is - interpolated linearly. This mode is typically used for texturing to - also map texels at the border of the texture to the mesh. - -- ``RTC_SUBDIVISION_MODE_PIN_ALL``: All vertices at the border are - pinned to their location during subdivision. This way all patches are - linearly interpolated. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_SUBDIVISION <#rtc_geometry_type_subdivision>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryVertexAttributeTopology -------------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryVertexAttributeTopology - binds a vertex - attribute to a topology of the geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcSetGeometryVertexAttributeTopology( - RTCGeometry geometry, - unsigned int vertexAttributeID, - unsigned int topologyID - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryVertexAttributeTopology`` function binds a vertex -attribute buffer slot (``vertexAttributeID`` argument) to a topology -(``topologyID`` argument) for the specified subdivision geometry -(``geometry`` argument). Standard vertex buffers are always bound to the -default topology (topology 0) and cannot be bound differently. A vertex -attribute buffer always uses the topology it is bound to when used in -the ``rtcInterpolate`` and ``rtcInterpolateN`` calls. - -A topology with ID ``i`` consists of a subdivision mode set through -``rtcSetGeometrySubdivisionMode`` and the index buffer bound to the -index buffer slot ``i``. This index buffer can assign indices for each -face of the subdivision geometry that are different to the indices of -the default topology. These new indices can for example be used to -introduce additional borders into the subdivision mesh to map multiple -textures onto one subdivision geometry. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometrySubdivisionMode <#rtcsetgeometrysubdivisionmode>`__, -`rtcInterpolate <#rtcinterpolate>`__, -`rtcInterpolateN <#rtcinterpolaten>`__ - -.. raw:: latex - - \pagebreak - -rtcSetGeometryDisplacementFunction ----------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcSetGeometryDisplacementFunction - sets the displacement function - for a subdivision geometry - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCDisplacementFunctionNArguments - { - void* geometryUserPtr; - RTCGeometry geometry; - unsigned int primID; - unsigned int timeStep; - const float* u; - const float* v; - const float* Ng_x; - const float* Ng_y; - const float* Ng_z; - float* P_x; - float* P_y; - float* P_z; - unsigned int N; - }; - - typedef void (*RTCDisplacementFunctionN)( - const struct RTCDisplacementFunctionNArguments* args - ); - - void rtcSetGeometryDisplacementFunction( - RTCGeometry geometry, - RTCDisplacementFunctionN displacement - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcSetGeometryDisplacementFunction`` function registers a -displacement callback function (``displacement`` argument) for the -specified subdivision geometry (``geometry`` argument). - -Only a single callback function can be registered per geometry, and -further invocations overwrite the previously set callback function. -Passing ``NULL`` as function pointer disables the registered callback -function. - -The registered displacement callback function is invoked to displace -points on the subdivision geometry during spatial acceleration structure -construction, during the ``rtcCommitScene`` call. - -The callback function of type ``RTCDisplacementFunctionN`` is invoked -with a number of arguments stored inside the -``RTCDisplacementFunctionNArguments`` structure. The provided user data -pointer of the geometry (``geometryUserPtr`` member) can be used to -point to the application’s representation of the subdivision mesh. A -number ``N`` of points to displace are specified in a structure of array -layout. For each point to displace, the local patch UV coordinates -(``u`` and ``v`` arrays), the normalized geometry normal (``Ng_x``, -``Ng_y``, and ``Ng_z`` arrays), and the position (``P_x``, ``P_y``, and -``P_z`` arrays) are provided. The task of the displacement function is -to use this information and change the position data. - -The geometry handle (``geometry`` member) and primitive ID (``primID`` -member) of the patch to displace are additionally provided as well as -the time step ``timeStep``, which can be important if the displacement -is time-dependent and motion blur is used. - -All passed arrays must be aligned to 64 bytes and properly padded to -make wide vector processing inside the displacement function easily -possible. - -Also see tutorial `Displacement -Geometry `__ for an example of how -to use the displacement mapping functions. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`RTC_GEOMETRY_TYPE_SUBDIVISION <#rtc_geometry_type_subdivision>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometryFirstHalfEdge ---------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometryFirstHalfEdge - returns the first half edge of a face - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - unsigned int rtcGetGeometryFirstHalfEdge( - RTCGeometry geometry, - unsigned int faceID - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometryFirstHalfEdge`` function returns the ID of the first -half edge belonging to the specified face (``faceID`` argument). For -instance in the following example the first half edge of face ``f1`` is -``e4``. - -.. image:: images/half_edges.png - -This function can only be used for subdivision geometries. As all -topologies of a subdivision geometry share the same face buffer the -function does not depend on the topology ID. - -Here f0 to f7 are 8 quadrilateral faces with 4 vertices each. The edges -e0 to e23 of these faces are shown with their orientation. For each face -the ID of the edges corresponds to the slots the face occupies in the -index array of the geometry. E.g. as the indices of face f1 start at -location 4 of the index array, the first edge is edge e4, the next edge -e5, etc. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetGeometryFirstHalfEdge <#rtcgetgeometryfirsthalfedge>`__, -`rtcGetGeometryFace <#rtcgetgeometryface>`__, -`rtcGetGeometryOppositeHalfEdge <#rtcgetgeometryoppositehalfedge>`__, -`rtcGetGeometryNextHalfEdge <#rtcgetgeometrynexthalfedge>`__, -`rtcGetGeometryPreviousHalfEdge <#rtcgetgeometryprevioushalfedge>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometryFace ------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometryFace - returns the face of some half edge - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - unsigned int rtcGetGeometryFace( - RTCGeometry geometry, - unsigned int edgeID - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometryFace`` function returns the ID of the face the -specified half edge (``edgeID`` argument) belongs to. For instance in -the following example the face ``f1`` is returned for edges ``e4``, -``e5``, ``e6``, and ``e7``. - -.. image:: images/half_edges.png - -This function can only be used for subdivision geometries. As all -topologies of a subdivision geometry share the same face buffer the -function does not depend on the topology ID. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetGeometryFirstHalfEdge <#rtcgetgeometryfirsthalfedge>`__, -`rtcGetGeometryFace <#rtcgetgeometryface>`__, -`rtcGetGeometryOppositeHalfEdge <#rtcgetgeometryoppositehalfedge>`__, -`rtcGetGeometryNextHalfEdge <#rtcgetgeometrynexthalfedge>`__, -`rtcGetGeometryPreviousHalfEdge <#rtcgetgeometryprevioushalfedge>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometryNextHalfEdge --------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometryNextHalfEdge - returns the next half edge - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - unsigned int rtcGetGeometryNextHalfEdge( - RTCGeometry geometry, - unsigned int edgeID - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometryNextHalfEdge`` function returns the ID of the next -half edge of the specified half edge (``edgeID`` argument). For instance -in the following example the next half edge of ``e10`` is ``e11``. - -.. image:: images/half_edges.png - -This function can only be used for subdivision geometries. As all -topologies of a subdivision geometry share the same face buffer the -function does not depend on the topology ID. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetGeometryFirstHalfEdge <#rtcgetgeometryfirsthalfedge>`__, -`rtcGetGeometryFace <#rtcgetgeometryface>`__, -`rtcGetGeometryOppositeHalfEdge <#rtcgetgeometryoppositehalfedge>`__, -`rtcGetGeometryNextHalfEdge <#rtcgetgeometrynexthalfedge>`__, -`rtcGetGeometryPreviousHalfEdge <#rtcgetgeometryprevioushalfedge>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometryPreviousHalfEdge ------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometryPreviousHalfEdge - returns the previous half edge - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - unsigned int rtcGetGeometryPreviousHalfEdge( - RTCGeometry geometry, - unsigned int edgeID - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometryPreviousHalfEdge`` function returns the ID of the -previous half edge of the specified half edge (``edgeID`` argument). For -instance in the following example the previous half edge of ``e6`` is -``e5``. - -.. image:: images/half_edges.png - -This function can only be used for subdivision geometries. As all -topologies of a subdivision geometry share the same face buffer the -function does not depend on the topology ID. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetGeometryFirstHalfEdge <#rtcgetgeometryfirsthalfedge>`__, -`rtcGetGeometryFace <#rtcgetgeometryface>`__, -`rtcGetGeometryOppositeHalfEdge <#rtcgetgeometryoppositehalfedge>`__, -`rtcGetGeometryNextHalfEdge <#rtcgetgeometrynexthalfedge>`__, -`rtcGetGeometryPreviousHalfEdge <#rtcgetgeometryprevioushalfedge>`__ - -.. raw:: latex - - \pagebreak - -rtcGetGeometryOppositeHalfEdge ------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetGeometryOppositeHalfEdge - returns the opposite half edge - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - unsigned int rtcGetGeometryOppositeHalfEdge( - RTCGeometry geometry, - unsigned int topologyID, - unsigned int edgeID - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetGeometryOppositeHalfEdge`` function returns the ID of the -opposite half edge of the specified half edge (``edgeID`` argument) in -the specified topology (``topologyID`` argument). For instance in the -following example the opposite half edge of ``e6`` is ``e16``. - -.. image:: images/half_edges.png - -An opposite half edge does not exist if the specified half edge has -either no neighboring face, or more than 2 neighboring faces. In these -cases the function just returns the same edge ``edgeID`` again. - -This function can only be used for subdivision geometries. The function -depends on the topology as the topologies of a subdivision geometry have -different index buffers assigned. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcGetGeometryFirstHalfEdge <#rtcgetgeometryfirsthalfedge>`__, -`rtcGetGeometryFace <#rtcgetgeometryface>`__, -`rtcGetGeometryOppositeHalfEdge <#rtcgetgeometryoppositehalfedge>`__, -`rtcGetGeometryNextHalfEdge <#rtcgetgeometrynexthalfedge>`__, -`rtcGetGeometryPreviousHalfEdge <#rtcgetgeometryprevioushalfedge>`__ - -.. raw:: latex - - \pagebreak - -rtcInterpolate --------------- - -NAME -^^^^ - -.. code:: cpp - - rtcInterpolate - interpolates vertex attributes - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCInterpolateArguments - { - RTCGeometry geometry; - unsigned int primID; - float u; - float v; - enum RTCBufferType bufferType; - unsigned int bufferSlot; - float* P; - float* dPdu; - float* dPdv; - float* ddPdudu; - float* ddPdvdv; - float* ddPdudv; - unsigned int valueCount; - }; - - void rtcInterpolate( - const struct RTCInterpolateArguments* args - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcInterpolate`` function smoothly interpolates per-vertex data -over the geometry. This interpolation is supported for triangle meshes, -quad meshes, curve geometries, and subdivision geometries. Apart from -interpolating the vertex attribute itself, it is also possible to get -the first and second order derivatives of that value. This interpolation -ignores displacements of subdivision surfaces and always interpolates -the underlying base surface. - -The ``rtcInterpolate`` call gets passed a number of arguments inside a -structure of type ``RTCInterpolateArguments``. For some geometry -(``geometry`` parameter) this function smoothly interpolates the -per-vertex data stored inside the specified geometry buffer -(``bufferType`` and ``bufferSlot`` parameters) to the u/v location -(``u`` and ``v`` parameters) of the primitive (``primID`` parameter). -The number of floating point values to interpolate and store to the -destination arrays can be specified using the ``valueCount`` parameter. -As interpolation buffer, one can specify vertex buffers -(``RTC_BUFFER_TYPE_VERTEX``) and vertex attribute buffers -(``RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE``) as well. - -The ``rtcInterpolate`` call stores ``valueCount`` number of interpolated -floating point values to the memory location pointed to by ``P``. One -can avoid storing the interpolated value by setting ``P`` to ``NULL``. - -The first order derivative of the interpolation by u and v are stored at -the ``dPdu`` and ``dPdv`` memory locations. One can avoid storing first -order derivatives by setting both ``dPdu`` and ``dPdv`` to ``NULL``. - -The second order derivatives are stored at the ``ddPdudu``, ``ddPdvdv``, -and ``ddPdudv`` memory locations. One can avoid storing second order -derivatives by setting these three pointers to ``NULL``. - -To use ``rtcInterpolate`` for a geometry, all changes to that geometry -must be properly committed using ``rtcCommitGeometry``. - -All input buffers and output arrays must be padded to 16 bytes, as the -implementation uses 16-byte SSE instructions to read and write into -these buffers. - -See tutorial `Interpolation `__ for an -example of using the ``rtcInterpolate`` function. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcInterpolateN <#rtcinterpolaten>`__ - -.. raw:: latex - - \pagebreak - -rtcInterpolateN ---------------- - -NAME -^^^^ - -.. code:: cpp - - rtcInterpolateN - performs N interpolations of vertex attribute data - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCInterpolateNArguments - { - RTCGeometry geometry; - const void* valid; - const unsigned int* primIDs; - const float* u; - const float* v; - unsigned int N; - enum RTCBufferType bufferType; - unsigned int bufferSlot; - float* P; - float* dPdu; - float* dPdv; - float* ddPdudu; - float* ddPdvdv; - float* ddPdudv; - unsigned int valueCount; - }; - - void rtcInterpolateN( - const struct RTCInterpolateNArguments* args - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcInterpolateN`` is similar to ``rtcInterpolate``, but performs -``N`` many interpolations at once. It additionally gets an array of u/v -coordinates and a valid mask (``valid`` parameter) that specifies which -of these coordinates are valid. The valid mask points to ``N`` integers, -and a value of -1 denotes valid and 0 invalid. If the valid pointer is -``NULL`` all elements are considers valid. The destination arrays are -filled in structure of array (SOA) layout. The value ``N`` must be -divisible by 4. - -To use ``rtcInterpolateN`` for a geometry, all changes to that geometry -must be properly committed using ``rtcCommitGeometry``. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcInterpolate <#rtcinterpolate>`__ - -.. raw:: latex - - \pagebreak - -rtcNewBuffer ------------- - -NAME -^^^^ - -.. code:: cpp - - rtcNewBuffer - creates a new data buffer - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCBuffer rtcNewBuffer( - RTCDevice device, - size_t byteSize - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcNewBuffer`` function creates a new data buffer object of -specified size in bytes (``byteSize`` argument) that is bound to the -specified device (``device`` argument). The buffer object is reference -counted with an initial reference count of 1. The returned buffer object -can be released using the ``rtcReleaseBuffer`` API call. The specified -number of bytes are allocated at buffer construction time and -deallocated when the buffer is destroyed. - -When the buffer will be used as a vertex buffer -(``RTC_BUFFER_TYPE_VERTEX`` and ``RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE``), -the last buffer element must be readable using 16-byte SSE load -instructions, thus padding the last element is required for certain -layouts. E.g. a standard ``float3`` vertex buffer layout should add -storage for at least one more float to the end of the buffer. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcRetainBuffer <#rtcretainbuffer>`__, -`rtcReleaseBuffer <#rtcreleasebuffer>`__ - -.. raw:: latex - - \pagebreak - -rtcNewSharedBuffer ------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcNewSharedBuffer - creates a new shared data buffer - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCBuffer rtcNewSharedBuffer( - RTCDevice device, - void* ptr, - size_t byteSize - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcNewSharedBuffer`` function creates a new shared data buffer -object bound to the specified device (``device`` argument). The buffer -object is reference counted with an initial reference count of 1. The -buffer can be released using the ``rtcReleaseBuffer`` function. - -At construction time, the pointer to the user-managed buffer data -(``ptr`` argument) including its size in bytes (``byteSize`` argument) -is provided to create the buffer. At buffer construction time no buffer -data is allocated, but the buffer data provided by the application is -used. The buffer data must remain valid for as long as the buffer may be -used, and the user is responsible to free the buffer data when no longer -required. - -When the buffer will be used as a vertex buffer -(``RTC_BUFFER_TYPE_VERTEX`` and ``RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE``), -the last buffer element must be readable using 16-byte SSE load -instructions, thus padding the last element is required for certain -layouts. E.g. a standard ``float3`` vertex buffer layout should add -storage for at least one more float to the end of the buffer. - -The data pointer (``ptr`` argument) must be aligned to 4 bytes; -otherwise the ``rtcNewSharedBuffer`` function will fail. - -EXIT STATUS -^^^^^^^^^^^ - -On failure ``NULL`` is returned and an error code is set that can be -queried using ``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcRetainBuffer <#rtcretainbuffer>`__, -`rtcReleaseBuffer <#rtcreleasebuffer>`__ - -.. raw:: latex - - \pagebreak - -rtcRetainBuffer ---------------- - -NAME -^^^^ - -.. code:: cpp - - rtcRetainBuffer - increments the buffer reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcRetainBuffer(RTCBuffer buffer); - -DESCRIPTION -^^^^^^^^^^^ - -Buffer objects are reference counted. The ``rtcRetainBuffer`` function -increments the reference count of the passed buffer object (``buffer`` -argument). This function together with ``rtcReleaseBuffer`` allows to -use the internal reference counting in a C++ wrapper class to handle the -ownership of the object. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewBuffer <#rtcnewbuffer>`__, -`rtcReleaseBuffer <#rtcreleasebuffer>`__ - -.. raw:: latex - - \pagebreak - -rtcReleaseBuffer ----------------- - -NAME -^^^^ - -.. code:: cpp - - rtcReleaseBuffer - decrements the buffer reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcReleaseBuffer(RTCBuffer buffer); - -DESCRIPTION -^^^^^^^^^^^ - -Buffer objects are reference counted. The ``rtcReleaseBuffer`` function -decrements the reference count of the passed buffer object (``buffer`` -argument). When the reference count falls to 0, the buffer gets -destroyed. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewBuffer <#rtcnewbuffer>`__, `rtcRetainBuffer <#rtcretainbuffer>`__ - -.. raw:: latex - - \pagebreak - -rtcGetBufferData ----------------- - -NAME -^^^^ - -.. code:: cpp - - rtcGetBufferData - gets a pointer to the buffer data - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void* rtcGetBufferData(RTCBuffer buffer); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcGetBufferData`` function returns a pointer to the buffer data -of the specified buffer object (``buffer`` argument). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewBuffer <#rtcnewbuffer>`__ - -.. raw:: latex - - \pagebreak - -RTCRay ------- - -NAME -^^^^ - -.. code:: cpp - - RTCRay - single ray structure - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTC_ALIGN(16) RTCRay - { - float org_x; // x coordinate of ray origin - float org_y; // y coordinate of ray origin - float org_z; // z coordinate of ray origin - float tnear; // start of ray segment - - float dir_x; // x coordinate of ray direction - float dir_y; // y coordinate of ray direction - float dir_z; // z coordinate of ray direction - float time; // time of this ray for motion blur - - float tfar; // end of ray segment (set to hit distance) - unsigned int mask; // ray mask - unsigned int id; // ray ID - unsigned int flags; // ray flags - }; - -DESCRIPTION -^^^^^^^^^^^ - -The ``RTCRay`` structure defines the ray layout for a single ray. The -ray contains the origin (``org_x``, ``org_y``, ``org_z`` members), -direction vector (``dir_x``, ``dir_y``, ``dir_z`` members), and ray -segment (``tnear`` and ``tfar`` members). The ray direction does not -have to be normalized, and only the parameter range specified by the -``tnear``/``tfar`` interval is considered valid. - -The ray segment must be in the range :math:`[0, \infty]`, thus ranges -that start behind the ray origin are not allowed, but ranges can reach -to infinity. For rays inside a ray stream, ``tfar`` < ``tnear`` -identifies an inactive ray. - -The ray further contains a motion blur time in the range :math:`[0, 1]` -(``time`` member), a ray mask (``mask`` member), a ray ID (``id`` -member), and ray flags (``flags`` member). The ray mask can be used to -mask out some geometries for some rays (see ``rtcSetGeometryMask`` for -more details). The ray ID can be used to identify a ray inside a -callback function, even if the order of rays inside a ray packet or -stream has changed. The ray flags are reserved. - -The ``embree3/rtcore_ray.h`` header additionally defines the same ray -structure in structure of array (SOA) layout for API functions accepting -ray packets of size 4 (``RTCRay4`` type), size 8 (``RTCRay8`` type), and -size 16 (``RTCRay16`` type). The header additionally defines an -``RTCRayNt`` template for ray packets of an arbitrary compile-time size. - -EXIT STATUS -^^^^^^^^^^^ - -SEE ALSO -^^^^^^^^ - -`RTCHit <#rtchit>`__ - -.. raw:: latex - - \pagebreak - -RTCHit ------- - -NAME -^^^^ - -.. code:: cpp - - RTCHit - single hit structure - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCHit - { - float Ng_x; // x coordinate of geometry normal - float Ng_y; // y coordinate of geometry normal - float Ng_z; // z coordinate of geometry normal - - float u; // barycentric u coordinate of hit - float v; // barycentric v coordinate of hit - - unsigned int primID; // geometry ID - unsigned int geomID; // primitive ID - unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID - }; - -DESCRIPTION -^^^^^^^^^^^ - -The ``RTCHit`` type defines the type of a ray/primitive intersection -result. The hit contains the unnormalized geometric normal in object -space at the hit location (``Ng_x``, ``Ng_y``, ``Ng_z`` members), the -barycentric u/v coordinates of the hit (``u`` and ``v`` members), as -well as the primitive ID (``primID`` member), geometry ID (``geomID`` -member), and instance ID stack (``instID`` member) of the hit. The -parametric intersection distance is not stored inside the hit, but -stored inside the ``tfar`` member of the ray. - -The ``embree3/rtcore_ray.h`` header additionally defines the same hit -structure in structure of array (SOA) layout for hit packets of size 4 -(``RTCHit4`` type), size 8 (``RTCHit8`` type), and size 16 (``RTCHit16`` -type). The header additionally defines an ``RTCHitNt`` template for hit -packets of an arbitrary compile-time size. - -EXIT STATUS -^^^^^^^^^^^ - -SEE ALSO -^^^^^^^^ - -`RTCRay <#rtcray>`__, [Multi-Level Instancing] - -.. raw:: latex - - \pagebreak - -RTCRayHit ---------- - -NAME -^^^^ - -.. code:: cpp - - RTCRayHit - combined single ray/hit structure - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCORE_ALIGN(16) RTCRayHit - { - struct RTCRay ray; - struct RTCHit hit; - }; - -DESCRIPTION -^^^^^^^^^^^ - -The ``RTCRayHit`` structure is used as input for the -``rtcIntersect``-type functions and stores the ray to intersect and some -hit fields that hold the intersection result afterwards. - -The ``embree3/rtcore_ray.h`` header additionally defines the same -ray/hit structure in structure of array (SOA) layout for API functions -accepting ray packets of size 4 (``RTCRayHit4`` type), size 8 -(``RTCRayHit8`` type), and size 16 (``RTCRayHit16`` type). The header -additionally defines an ``RTCRayHitNt`` template to generate ray/hit -packets of an arbitrary compile-time size. - -EXIT STATUS -^^^^^^^^^^^ - -SEE ALSO -^^^^^^^^ - -`RTCRay <#rtcray>`__, `RTCHit <#rtchit>`__ - -.. raw:: latex - - \pagebreak - -RTCRayN -------- - -NAME -^^^^ - -.. code:: cpp - - RTCRayN - ray packet of runtime size - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCRayN; - - float& RTCRayN_org_x(RTCRayN* ray, unsigned int N, unsigned int i); - float& RTCRayN_org_y(RTCRayN* ray, unsigned int N, unsigned int i); - float& RTCRayN_org_z(RTCRayN* ray, unsigned int N, unsigned int i); - float& RTCRayN_tnear(RTCRayN* ray, unsigned int N, unsigned int i); - - float& RTCRayN_dir_x(RTCRayN* ray, unsigned int N, unsigned int i); - float& RTCRayN_dir_y(RTCRayN* ray, unsigned int N, unsigned int i); - float& RTCRayN_dir_z(RTCRayN* ray, unsigned int N, unsigned int i); - float& RTCRayN_time (RTCRayN* ray, unsigned int N, unsigned int i); - - float& RTCRayN_tfar (RTCRayN* ray, unsigned int N, unsigned int i); - unsigned int& RTCRayN_mask (RTCRayN* ray, unsigned int N, unsigned int i); - unsigned int& RTCRayN_id (RTCRayN* ray, unsigned int N, unsigned int i); - unsigned int& RTCRayN_flags(RTCRayN* ray, unsigned int N, unsigned int i); - -DESCRIPTION -^^^^^^^^^^^ - -When the ray packet size is not known at compile time (e.g. when Embree -returns a ray packet in the ``RTCFilterFuncN`` callback function), -Embree uses the ``RTCRayN`` type for ray packets. These ray packets can -only have sizes of 1, 4, 8, or 16. No other packet size will be used. - -You can either implement different special code paths for each of these -possible packet sizes and cast the ray to the appropriate ray packet -type, or implement one general code path that uses the ``RTCRayN_XXX`` -helper functions to access the ray packet components. - -These helper functions get a pointer to the ray packet (``ray`` -argument), the packet size (``N`` argument), and returns a reference to -a component (e.g. x-component of origin) of the the i-th ray of the -packet (``i`` argument). - -EXIT STATUS -^^^^^^^^^^^ - -SEE ALSO -^^^^^^^^ - -`RTCHitN <#rtchitn>`__ - -.. raw:: latex - - \pagebreak - -RTCHitN -------- - -NAME -^^^^ - -.. code:: cpp - - RTCHitN - hit packet of runtime size - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct HitN; - - float& RTCHitN_Ng_x(RTCHitN* hit, unsigned int N, unsigned int i); - float& RTCHitN_Ng_y(RTCHitN* hit, unsigned int N, unsigned int i); - float& RTCHitN_Ng_z(RTCHitN* hit, unsigned int N, unsigned int i); - - float& RTCHitN_u(RTCHitN* hit, unsigned int N, unsigned int i); - float& RTCHitN_v(RTCHitN* hit, unsigned int N, unsigned int i); - - unsigned& RTCHitN_primID(RTCHitN* hit, unsigned int N, unsigned int i); - unsigned& RTCHitN_geomID(RTCHitN* hit, unsigned int N, unsigned int i); - unsigned& RTCHitN_instID(RTCHitN* hit, unsigned int N, unsigned int i, unsigned int level); - -DESCRIPTION -^^^^^^^^^^^ - -When the hit packet size is not known at compile time (e.g. when Embree -returns a hit packet in the ``RTCFilterFuncN`` callback function), -Embree uses the ``RTCHitN`` type for hit packets. These hit packets can -only have sizes of 1, 4, 8, or 16. No other packet size will be used. - -You can either implement different special code paths for each of these -possible packet sizes and cast the hit to the appropriate hit packet -type, or implement one general code path that uses the ``RTCHitN_XXX`` -helper functions to access hit packet components. - -These helper functions get a pointer to the hit packet (``hit`` -argument), the packet size (``N`` argument), and returns a reference to -a component (e.g. x component of ``Ng``) of the the i-th hit of the -packet (``i`` argument). - -EXIT STATUS -^^^^^^^^^^^ - -SEE ALSO -^^^^^^^^ - -`RTCRayN <#rtcrayn>`__ - -.. raw:: latex - - \pagebreak - -RTCRayHitN ----------- - -NAME -^^^^ - -.. code:: cpp - - RTCRayHitN - combined ray/hit packet of runtime size - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCRayHitN; - - struct RTCRayN* RTCRayHitN_RayN(struct RTCRayHitN* rayhit, unsigned int N); - struct RTCHitN* RTCRayHitN_HitN(struct RTCRayHitN* rayhit, unsigned int N); - -DESCRIPTION -^^^^^^^^^^^ - -When the packet size of a ray/hit structure is not known at compile time -(e.g. when Embree returns a ray/hit packet in the -``RTCIntersectFunctionN`` callback function), Embree uses the -``RTCRayHitN`` type for ray packets. These ray/hit packets can only have -sizes of 1, 4, 8, or 16. No other packet size will be used. - -You can either implement different special code paths for each of these -possible packet sizes and cast the ray/hit to the appropriate ray/hit -packet type, or extract the ``RTCRayN`` and ``RTCHitN`` components using -the ``rtcGetRayN`` and ``rtcGetHitN`` helper functions and use the -``RTCRayN_XXX`` and ``RTCHitN_XXX`` functions to access the ray and hit -parts of the structure. - -EXIT STATUS -^^^^^^^^^^^ - -SEE ALSO -^^^^^^^^ - -`RTCHitN <#rtchitn>`__ - -.. raw:: latex - - \pagebreak - -rtcInitIntersectContext ------------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcInitIntersectContext - initializes the intersection context - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - enum RTCIntersectContextFlags - { - RTC_INTERSECT_CONTEXT_FLAG_NONE, - RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT, - RTC_INTERSECT_CONTEXT_FLAG_COHERENT, - }; - - struct RTCIntersectContext - { - enum RTCIntersectContextFlags flags; - RTCFilterFunctionN filter; - - #if RTC_MAX_INSTANCE_LEVEL_COUNT > 1 - unsigned int instStackSize; - #endif - - unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; - - #if RTC_MIN_WIDTH - float minWidthDistanceFactor; - #endif - }; - - void rtcInitIntersectContext( - struct RTCIntersectContext* context - ); - -DESCRIPTION -^^^^^^^^^^^ - -A per ray-query intersection context (``RTCIntersectContext`` type) is -supported that can be used to configure intersection flags (``flags`` -member), specify a filter callback function (``filter`` member), specify -the chain of IDs of the current instance (``instID`` and -``instStackSize`` members), and to attach arbitrary data to the query -(e.g. per ray data). - -The ``rtcInitIntersectContext`` function initializes the context to -default values and should be called to initialize every intersection -context. This function gets inlined, which minimizes overhead and allows -for compiler optimizations. - -The intersection context flag can be used to tune the behavior of the -traversal algorithm. Using the ``RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT`` -flags uses an optimized traversal algorithm for incoherent rays -(default), while ``RTC_INTERSECT_CONTEXT_FLAG_COHERENT`` uses an -optimized traversal algorithm for coherent rays (e.g. primary camera -rays). - -Best primary ray performance can be obtained by using the ray stream API -and setting the intersect context flag to -``RTC_INTERSECT_CONTEXT_FLAG_COHERENT``. For secondary rays, it is -typically better to use the ``RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT`` -flag, unless the rays are known to be very coherent too (e.g. for -primary transparency rays). - -A filter function can be specified inside the context. This filter -function is invoked as a second filter stage after the per-geometry -intersect or occluded filter function is invoked. Only rays that passed -the first filter stage are valid in this second filter stage. Having -such a per ray-query filter function can be useful to implement -modifications of the behavior of the query, such as collecting all hits -or accumulating transparencies. The support for the context filter -function must be enabled for a scene by using the -``RTC_SCENE_FLAG_CONTEXT_FILTER_FUNCTION`` scene flag. In case of -instancing this feature has to get enabled also for each instantiated -scene. - -The minWidthDistanceFactor value controls the target size of the curve -radii when the min-width feature is enabled. Please see the -[rtcSetGeometryMaxRadiusScale] function for more details on the -min-width feature. - -It is guaranteed that the pointer to the intersection context passed to -a ray query is directly passed to the registered callback functions. -This way it is possible to attach arbitrary data to the end of the -intersection context, such as a per-ray payload. - -Please note that the ray pointer is not guaranteed to be passed to the -callback functions, thus reading additional data from the ray pointer -passed to callbacks is not possible. - -EXIT STATUS -^^^^^^^^^^^ - -No error code is set by this function. - -SEE ALSO -^^^^^^^^ - -`rtcIntersect1 <#rtcintersect1>`__, `rtcOccluded1 <#rtcoccluded1>`__ - -.. raw:: latex - - \pagebreak - -rtcIntersect1 -------------- - -NAME -^^^^ - -.. code:: cpp - - rtcIntersect1 - finds the closest hit for a single ray - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcIntersect1( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayHit* rayhit - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcIntersect1`` function finds the closest hit of a single ray -with the scene (``scene`` argument). The provided ray/hit structure -(``rayhit`` argument) contains the ray to intersect and some hit output -fields that are filled when a hit is found. - -The user has to initialize the ray origin (``org`` ray member), ray -direction (``dir`` ray member), ray segment (``tnear``, ``tfar`` ray -members), and set the ray flags to ``0`` (``flags`` ray member). If the -scene contains motion blur geometries, also the ray time (``time`` ray -member) must be initialized to a value in the range :math:`[0, 1]`. If -ray masks are enabled at compile time, the ray mask (``mask`` ray -member) must be initialized as well. The ray segment has to be in the -range :math:`[0, \infty]`, thus ranges that start behind the ray origin -are not valid, but ranges can reach to infinity. See Section -`RTCRay <#rtcray>`__ for the ray layout description. - -The geometry ID (``geomID`` hit member) of the hit data must be -initialized to ``RTC_INVALID_GEOMETRY_ID`` (-1). - -Further, an intersection context for the ray query function must be -created and initialized (see ``rtcInitIntersectContext``). - -When no intersection is found, the ray/hit data is not updated. When an -intersection is found, the hit distance is written into the ``tfar`` -member of the ray and all hit data is set, such as unnormalized geometry -normal in object space (``Ng`` hit member), local hit coordinates -(``u``, ``v`` hit member), instance ID stack (``instID`` hit member), -geometry ID (``geomID`` hit member), and primitive ID (``primID`` hit -member). See Section `RTCHit <#rtchit>`__ for the hit layout -description. - -If the instance ID stack has a prefix of values not equal to -``RTC_INVALID_GEOMETRY_ID``, the instance ID on each level corresponds -to the geometry ID of the hit instance of the higher-level scene, the -geometry ID corresponds to the hit geometry inside the hit instanced -scene, and the primitive ID corresponds to the n-th primitive of that -geometry. - -If level 0 of the instance ID stack is equal to -``RTC_INVALID_GEOMETRY_ID``, the geometry ID corresponds to the hit -geometry inside the top-level scene, and the primitive ID corresponds to -the n-th primitive of that geometry. - -The implementation makes no guarantees that primitives whose hit -distance is exactly at (or very close to) ``tnear`` or ``tfar`` are hit -or missed. If you want to exclude intersections at ``tnear`` just pass a -slightly enlarged ``tnear``, and if you want to include intersections at -``tfar`` pass a slightly enlarged ``tfar``. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The ray pointer passed to callback functions is not guaranteed to be -identical to the original ray provided. To extend the ray with -additional data to be accessed in callback functions, use the -intersection context. - -The ray/hit structure must be aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcOccluded1 <#rtcoccluded1>`__, `RTCRayHit <#rtcrayhit>`__, -`RTCRay <#rtcray>`__, `RTCHit <#rtchit>`__ - -.. raw:: latex - - \pagebreak - -rtcOccluded1 ------------- - -NAME -^^^^ - -.. code:: cpp - - rtcOccluded1 - finds any hit for a single ray - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcOccluded1( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRay* ray - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcOccluded1`` function checks for a single ray (``ray`` argument) -whether there is any hit with the scene (``scene`` argument). - -The user must initialize the ray origin (``org`` ray member), ray -direction (``dir`` ray member), ray segment (``tnear``, ``tfar`` ray -members), and must set the ray flags to ``0`` (``flags`` ray member). If -the scene contains motion blur geometries, also the ray time (``time`` -ray member) must be initialized to a value in the range :math:`[0, 1]`. -If ray masks are enabled at compile time, the ray mask (``mask`` ray -member) must be initialized as well. The ray segment must be in the -range :math:`[0, \infty]`, thus ranges that start behind the ray origin -are not valid, but ranges can reach to infinity. See Section -`RTCRay <#rtcray>`__ for the ray layout description. - -When no intersection is found, the ray data is not updated. In case a -hit was found, the ``tfar`` component of the ray is set to ``-inf``. - -The implementation makes no guarantees that primitives whose hit -distance is exactly at (or very close to) ``tnear`` or ``tfar`` are hit -or missed. If you want to exclude intersections at ``tnear`` just pass a -slightly enlarged ``tnear``, and if you want to include intersections at -``tfar`` pass a slightly enlarged ``tfar``. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The ray pointer passed to callback functions is not guaranteed to be -identical to the original ray provided. To extend the ray with -additional data to be accessed in callback functions, use the -intersection context. - -The ray must be aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcOccluded1 <#rtcoccluded1>`__, `RTCRay <#rtcray>`__ - -.. raw:: latex - - \pagebreak - -rtcIntersect4/8/16 ------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcIntersect4/8/16 - finds the closest hits for a ray packet - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcIntersect4( - const int* valid, - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayHit4* rayhit - ); - - void rtcIntersect8( - const int* valid, - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayHit8* rayhit - ); - - void rtcIntersect16( - const int* valid, - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayHit16* rayhit - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcIntersect4/8/16`` functions finds the closest hits for a ray -packet of size 4, 8, or 16 (``rayhit`` argument) with the scene -(``scene`` argument). The ray/hit input contains a ray packet and hit -packet. See Section `rtcIntersect1 <#rtcintersect1>`__ for a description -of how to set up and trace rays. - -A ray valid mask must be provided (``valid`` argument) which stores one -32-bit integer (``-1`` means valid and ``0`` invalid) per ray in the -packet. Only active rays are processed, and hit data of inactive rays is -not changed. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The ray pointer passed to callback functions is not guaranteed to be -identical to the original ray provided. To extend the ray with -additional data to be accessed in callback functions, use the -intersection context. - -The implementation of these functions is guaranteed to invoke callback -functions always with the same ray packet size and ordering of rays as -specified initially. - -For ``rtcIntersect4`` the ray packet must be aligned to 16 bytes, for -``rtcIntersect8`` the alignment must be 32 bytes, and for -``rtcIntersect16`` the alignment must be 64 bytes. - -The ``rtcIntersect4``, ``rtcIntersect8`` and ``rtcIntersect16`` -functions may change the ray packet size and ray order when calling back -into intersect filter functions or user geometry callbacks. Under some -conditions the application can assume packets to stay intact, which can -determined by querying the -``RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED``, -``RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED``, -``RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED`` properties through the -``rtcGetDeviceProperty`` function. See -`rtcGetDeviceProperty <#rtcgetdeviceproperty>`__ for more information. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcOccluded4/8/16 <#rtcoccluded4816>`__ - -.. raw:: latex - - \pagebreak - -rtcOccluded4/8/16 ------------------ - -NAME -^^^^ - -.. code:: cpp - - rtcOccluded4/8/16 - finds any hits for a ray packet - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcOccluded4( - const int* valid, - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRay4* ray - ); - - void rtcOccluded8( - const int* valid, - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRay8* ray - ); - - void rtcOccluded16( - const int* valid, - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRay16* ray - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcOccluded4/8/16`` functions checks for each active ray of the -ray packet of size 4, 8, or 16 (``ray`` argument) whether there is any -hit with the scene (``scene`` argument). See Section -`rtcOccluded1 <#rtcoccluded1>`__ for a description of how to set up and -trace occlusion rays. - -A ray valid mask must be provided (``valid`` argument) which stores one -32-bit integer (``-1`` means valid and ``0`` invalid) per ray in the -packet. Only active rays are processed, and hit data of inactive rays is -not changed. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The ray pointer passed to callback functions is not guaranteed to be -identical to the original ray provided. To extend the ray with -additional data to be accessed in callback functions, use the -intersection context. - -The implementation of these functions is guaranteed to invoke callback -functions always with the same ray packet size and ordering of rays as -specified initially. - -For ``rtcOccluded4`` the ray packet must be aligned to 16 bytes, for -``rtcOccluded8`` the alignment must be 32 bytes, and for -``rtcOccluded16`` the alignment must be 64 bytes. - -The ``rtcOccluded4``, ``rtcOccluded8`` and ``rtcOccluded16`` functions -may change the ray packet size and ray order when calling back into -intersect filter functions or user geometry callbacks. Under some -conditions the application can assume packets to stay intakt, which can -determined by querying the -``RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED``, -``RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED``, -``RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED`` properties through the -``rtcGetDeviceProperty`` function. See -`rtcGetDeviceProperty <#rtcgetdeviceproperty>`__ for more information. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcOccluded4/8/16 <#rtcoccluded4816>`__ - -.. raw:: latex - - \pagebreak - -rtcIntersect1M --------------- - -NAME -^^^^ - -.. code:: cpp - - rtcIntersect1M - finds the closest hits for a stream of M single - rays - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcIntersect1M( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayHit* rayhit, - unsigned int M, - size_t byteStride - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcIntersect1M`` function finds the closest hits for a stream of -``M`` single rays (``rayhit`` argument) with the scene (``scene`` -argument). The ``rayhit`` argument points to an array of ray and hit -data with specified byte stride (``byteStride`` argument) between the -ray/hit structures. See Section `rtcIntersect1 <#rtcintersect1>`__ for a -description of how to set up and trace rays. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The implementation of the stream ray query functions may re-order rays -arbitrarily and re-pack rays into ray packets of different size. For -this reason, callback functions may be invoked with an arbitrary packet -size (of size 1, 4, 8, or 16) and different ordering as specified -initially. For this reason, one may have to use the ``rayID`` component -of the ray to identify the original ray, e.g. to access a per-ray -payload. - -A ray in a ray stream is considered inactive if its ``tnear`` value is -larger than its ``tfar`` value. - -The stream size ``M`` can be an arbitrary positive integer including 0. -Each ray must be aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcOccluded1M <#rtcoccluded1m>`__ - -.. raw:: latex - - \pagebreak - -rtcOccluded1M -------------- - -NAME -^^^^ - -.. code:: cpp - - rtcOccluded1M - finds any hits for a stream of M single rays - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcOccluded1M( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRay* ray, - unsigned int M, - size_t byteStride - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcOccluded1M`` function checks whether there are any hits for a -stream of ``M`` single rays (``ray`` argument) with the scene (``scene`` -argument). The ``ray`` argument points to an array of rays with -specified byte stride (``byteStride`` argument) between the rays. See -Section `rtcOccluded1 <#rtcoccluded1>`__ for a description of how to set -up and trace occlusion rays. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The implementation of the stream ray query functions may re-order rays -arbitrarily and re-pack rays into ray packets of different size. For -this reason, callback functions may be invoked with an arbitrary packet -size (of size 1, 4, 8, or 16) and different ordering as specified -initially. For this reason, one may have to use the ``rayID`` component -of the ray to identify the original ray, e.g. to access a per-ray -payload. - -A ray in a ray stream is considered inactive if its ``tnear`` value is -larger than its ``tfar`` value. - -The stream size ``M`` can be an arbitrary positive integer including 0. -Each ray must be aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcIntersect1M <#rtcintersect1m>`__ - -.. raw:: latex - - \pagebreak - -rtcIntersect1Mp ---------------- - -NAME -^^^^ - -.. code:: cpp - - rtcIntersect1Mp - finds the closest hits for a stream of M pointers - to single rays - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcIntersect1Mp( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayHit** rayhit, - unsigned int M - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcIntersect1Mp`` function finds the closest hits for a stream of -``M`` single rays (``rayhit`` argument) with the scene (``scene`` -argument). The ``rayhit`` argument points to an array of pointers to the -individual ray/hit structures. See Section -`rtcIntersect1 <#rtcintersect1>`__ for a description of how to set up -and trace a ray. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The implementation of the stream ray query functions may re-order rays -arbitrarily and re-pack rays into ray packets of different size. For -this reason, callback functions may be invoked with an arbitrary packet -size (of size 1, 4, 8, or 16) and different ordering as specified -initially. For this reason, one may have to use the ``rayID`` component -of the ray to identify the original ray, e.g. to access a per-ray -payload. - -A ray in a ray stream is considered inactive if its ``tnear`` value is -larger than its ``tfar`` value. - -The stream size ``M`` can be an arbitrary positive integer including 0. -Each ray must be aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcOccluded1Mp <#rtcoccluded1mp>`__ - -.. raw:: latex - - \pagebreak - -rtcOccluded1Mp --------------- - -NAME -^^^^ - -.. code:: cpp - - rtcOccluded1Mp - find any hits for a stream of M pointers to - single rays - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcOccluded1M( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRay** ray, - unsigned int M - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcOccluded1Mp`` function checks whether there are any hits for a -stream of ``M`` single rays (``ray`` argument) with the scene (``scene`` -argument). The ``ray`` argument points to an array of pointers to rays. -Section `rtcOccluded1 <#rtcoccluded1>`__ for a description of how to set -up and trace a occlusion rays. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The implementation of the stream ray query functions may re-order rays -arbitrarily and re-pack rays into ray packets of different size. For -this reason, callback functions may be invoked with an arbitrary packet -size (of size 1, 4, 8, or 16) and different ordering as specified -initially. For this reason, one may have to use the ``rayID`` component -of the ray to identify the original ray, e.g. to access a per-ray -payload. - -A ray in a ray stream is considered inactive if its ``tnear`` value is -larger than its ``tfar`` value. - -The stream size ``M`` can be an arbitrary positive integer including 0. -Each ray must be aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcIntersect1Mp <#rtcintersect1mp>`__ - -.. raw:: latex - - \pagebreak - -rtcIntersectNM --------------- - -NAME -^^^^ - -.. code:: cpp - - rtcIntersectNM - finds the closest hits for a stream of M - ray packets of size N - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcIntersectNM( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayHitN* rayhit, - unsigned int N, - unsigned int M, - size_t byteStride - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcIntersectNM`` function finds the closest hits for a stream of -``M`` ray packets (``rayhit`` argument) of size ``N`` with the scene -(``scene`` argument). The ``rays`` argument points to an array of ray -and hit packets with specified byte stride (``byteStride`` argument) -between the ray/hit packets. See Section -`rtcIntersect1 <#rtcintersect1>`__ for a description of how to set up -and trace rays. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The implementation of the stream ray query functions may re-order rays -arbitrarily and re-pack rays into ray packets of different size. For -this reason, callback functions may be invoked with an arbitrary packet -size (of size 1, 4, 8, or 16) and different ordering as specified -initially. For this reason, one may have to use the ``rayID`` component -of the ray to identify the original ray, e.g. to access a per-ray -payload. - -A ray in a ray stream is considered inactive if its ``tnear`` value is -larger than its ``tfar`` value. - -The packet size ``N`` must be larger than 0, and the stream size ``M`` -can be an arbitrary positive integer including 0. Each ray must be -aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcOccludedNM <#rtcoccludednm>`__ - -.. raw:: latex - - \pagebreak - -rtcOccludedNM -------------- - -NAME -^^^^ - -.. code:: cpp - - rtcOccludedNM - finds any hits for a stream of M ray packets of - size N - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcOccludedNM( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayN* ray, - unsigned int N, - unsigned int M, - size_t byteStride - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcOccludedNM`` function checks whether there are any hits for a -stream of ``M`` ray packets (``ray`` argument) of size ``N`` with the -scene (``scene`` argument). The ``ray`` argument points to an array of -ray packets with specified byte stride (``byteStride`` argument) between -the ray packets. See Section `rtcOccluded1 <#rtcoccluded1>`__ for a -description of how to set up and trace occlusion rays. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The implementation of the stream ray query functions may re-order rays -arbitrarily and re-pack rays into ray packets of different size. For -this reason, callback functions may be invoked with an arbitrary packet -size (of size 1, 4, 8, or 16) and different ordering as specified -initially. For this reason, one may have to use the ``rayID`` component -of the ray to identify the original ray, e.g. to access a per-ray -payload. - -A ray in a ray stream is considered inactive if its ``tnear`` value is -larger than its ``tfar`` value. - -The packet size ``N`` must be larger than 0, and the stream size ``M`` -can be an arbitrary positive integer including 0. Each ray must be -aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcIntersectNM <#rtcintersectnm>`__ - -.. raw:: latex - - \pagebreak - -rtcIntersectNp --------------- - -NAME -^^^^ - -.. code:: cpp - - rtcIntersectNp - finds the closest hits for a SOA ray stream of - size N - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcIntersectNp( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayHitNp* rayhit, - unsigned int N - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcIntersectNp`` function finds the closest hits for a SOA ray -stream (``rays`` argument) of size ``N`` (basically a large ray packet) -with the scene (``scene`` argument). The ``rayhit`` argument points to -two structures of pointers with one pointer for each ray and hit -component. Each of these pointers points to an array with the ray or hit -component data for each ray or hit. This way the individual components -of the SOA ray stream do not need to be stored sequentially in memory, -which makes it possible to have large varying size ray packets in SOA -layout. See Section `rtcIntersect1 <#rtcintersect1>`__ for a description -of how to set up and trace rays. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The implementation of the stream ray query functions may re-order rays -arbitrarily and re-pack rays into ray packets of different size. For -this reason, callback functions may be invoked with an arbitrary packet -size (of size 1, 4, 8, or 16) and different ordering as specified -initially. For this reason, one may have to use the ``rayID`` component -of the ray to identify the original ray, e.g. to access a per-ray -payload. - -A ray in a ray stream is considered inactive if its ``tnear`` value is -larger than its ``tfar`` value. - -The stream size ``N`` can be an arbitrary positive integer including 0. -Each ray component array must be aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcOccludedNp <#rtcoccludednp>`__ - -.. raw:: latex - - \pagebreak - -rtcOccludedNp -------------- - -NAME -^^^^ - -.. code:: cpp - - rtcOccludedNp - finds any hits for a SOA ray stream of size N - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcOccludedNp( - RTCScene scene, - struct RTCIntersectContext* context, - struct RTCRayNp* ray, - unsigned int N - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcOccludedNp`` function checks whether there are any hits for a -SOA ray stream (``ray`` argument) of size ``N`` (basically a large ray -packet) with the scene (``scene`` argument). The ``ray`` argument points -to a structure of pointers with one pointer for each ray component. Each -of these pointers points to an array with the ray component data for -each ray. This way the individual components of the SOA ray stream do -not need to be stored sequentially in memory, which makes it possible to -have large varying size ray packets in SOA layout. See Section -`rtcOccluded1 <#rtcoccluded1>`__ for a description of how to set up and -trace occlusion rays. - -The intersection context (``context`` argument) can specify flags to -optimize traversal and a filter callback function to be invoked for -every intersection. Further, the pointer to the intersection context is -propagated to callback functions invoked during traversal and can thus -be used to extend the ray with additional data. See Section -``RTCIntersectContext`` for more information. - -The implementation of the stream ray query functions may re-order rays -arbitrarily and re-pack rays into ray packets of different size. For -this reason, callback functions may be invoked with an arbitrary packet -size (of size 1, 4, 8, or 16) and different ordering as specified -initially. For this reason, one may have to use the ``rayID`` component -of the ray to identify the original ray, e.g. to access a per-ray -payload. - -A ray in a ray stream is considered inactive if its ``tnear`` value is -larger than its ``tfar`` value. - -The stream size ``N`` can be an arbitrary positive integer including 0. -Each ray component array must be aligned to 16 bytes. - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcIntersectNp <#rtcintersectnp>`__ - -.. raw:: latex - - \pagebreak - -rtcInitPointQueryContext ------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcInitPointQueryContext - initializes the context information (e.g. - stack of (multilevel-)instance transformations) for point queries - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTC_ALIGN(16) RTCPointQueryContext - { - // accumulated 4x4 column major matrices from world to instance space. - float world2inst[RTC_MAX_INSTANCE_LEVEL_COUNT][16]; - - // accumulated 4x4 column major matrices from instance to world space. - float inst2world[RTC_MAX_INSTANCE_LEVEL_COUNT][16]; - - // instance ids. - unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; - - // number of instances currently on the stack. - unsigned int instStackSize; - }; - - void rtcInitPointQueryContext( - struct RTCPointQueryContext* context - ); - -DESCRIPTION -^^^^^^^^^^^ - -A stack (``RTCPointQueryContext`` type) which stores the IDs and -instance transformations during a BVH traversal for a point query. The -transformations are assumed to be affine transformations (3×3 matrix -plus translation) and therefore the last column is ignored (see -`RTC_GEOMETRY_TYPE_INSTANCE <#rtc_geometry_type_instance>`__ for -details). - -The ``rtcInitPointContext`` function initializes the context to default -values and should be called for initialization. - -The context will be passed as an argument to the point query callback -function (see -`rtcSetGeometryPointQueryFunction <#rtcsetgeometrypointqueryfunction>`__) -and should be used to pass instance information down the instancing -chain for user defined instancing (see tutorial [ClosestPoint] for a -reference implementation of point queries with user defined instancing). - -The context is an necessary argument to -`rtcPointQuery <#rtcpointquery>`__ and Embree internally uses the -topmost instance transformation of the stack to transform the point query -into instance space. - -EXIT STATUS -^^^^^^^^^^^ - -No error code is set by this function. - -SEE ALSO -^^^^^^^^ - -`rtcPointQuery <#rtcpointquery>`__, -`rtcSetGeometryPointQueryFunction <#rtcsetgeometrypointqueryfunction>`__ - -.. raw:: latex - - \pagebreak - -rtcPointQuery -------------- - -NAME -^^^^ - -.. code:: cpp - - rtcPointQuery - traverses the BVH with a point query object - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTC_ALIGN(16) RTCPointQuery - { - // location of the query - float x; - float y; - float z; - - // radius and time of the query - float radius; - float time; - }; - - void rtcPointQuery( - RTCScene scene, - struct RTCPointQuery* query, - struct RTCPointQueryContext* context, - struct RTCPointQueryFunction* queryFunc, - void* userPtr - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcPointQuery`` function traverses the BVH using a -``RTCPointQuery`` object (``query`` argument) and calls a user defined -callback function (e.g ``queryFunc`` argument) for each primitive of the -scene (``scene`` argument) that intersects the query domain. - -The user has to initialize the query location (``x``, ``y`` and ``z`` -member) and query radius in the range :math:`[0, \infty]`. If the scene -contains motion blur geometries, also the query time (``time`` member) -must be initialized to a value in the range :math:`[0, 1]`. - -Further, a ``RTCPointQueryContext`` (``context`` argument) must be -created and initialized. It contains ID and transformation information -of the instancing hierarchy if (multilevel-)instancing is used. See -`rtcInitPointQueryContext <#rtcinitpointquerycontext>`__ for further -information. - -For every primitive that intersects the query domain, the callback -function (``queryFunc`` argument) is called, in which distance -computations to the primitive can be implemented. The user will be -provided with the primID and geomID of the according primitive, however, -the geometry information (e.g. triangle index and vertex data) has to be -determined manually. The ``userPtr`` argument can be used to input -geometry data of the scene or output results of the point query -(e.g. closest point currently found on surface geometry (see tutorial -[ClosestPoint])). - -The parameter ``queryFunc`` is optional and can be NULL, in which case -the callback function is not invoked. However, a callback function can -still get attached to a specific ``RTCGeometry`` object using -`rtcSetGeometryPointQueryFunction <#rtcsetgeometrypointqueryfunction>`__. -If a callback function is attached to a geometry and (a potentially -different) callback function is passed as an argument to -``rtcPointQuery``, both functions are called for the primitives of the -according geometries. - -The query radius can be decreased inside the callback function, which -allows to efficiently cull parts of the scene during BVH traversal. -Increasing the query radius and modifying time or location of the query -will result in undefined behaviour. - -The callback function will be called for all primitives in a leaf node -of the BVH even if the primitive is outside the query domain, since -Embree does not gather geometry information of primitives internally. - -Point queries can be used with (multilevel)-instancing. However, care -has to be taken when the instance transformation contains anisotropic -scaling or sheering. In these cases distance computations have to be -performed in world space to ensure correctness and the ellipsoidal query -domain (in instance space) will be approximated with its axis aligned -bounding box internally. Therefore, the callback function might be -invoked even for primitives in inner BVH nodes that do not intersect the -query domain. See -`rtcSetGeometryPointQueryFunction <#rtcsetgeometrypointqueryfunction>`__ -for details. - -The point query structure must be aligned to 16 bytes. - -SUPPORTED PRIMITIVES -^^^^^^^^^^^^^^^^^^^^ - -Currently, all primitive types are supported by the point query API -except of points (see -`RTC_GEOMETRY_TYPE_POINT <#rtc_geometry_type_point>`__), curves (see -`RTC_GEOMETRY_TYPE_CURVE <#rtc_geometry_type_curve>`__) and sudivision -surfaces (see [RTC_GEOMETRY_SUBDIVISION]). - -EXIT STATUS -^^^^^^^^^^^ - -For performance reasons this function does not do any error checks, thus -will not set any error flags on failure. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryPointQueryFunction <#rtcsetgeometrypointqueryfunction>`__, -`rtcInitPointQueryContext <#rtcinitpointquerycontext>`__ - -.. raw:: latex - - \pagebreak - -rtcCollide ----------- - -NAME -^^^^ - -.. code:: cpp - - rtcCollide - intersects one BVH with another - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTCCollision { - unsigned int geomID0, primID0; - unsigned int geomID1, primID1; - }; - - typedef void (*RTCCollideFunc) ( - void* userPtr, - RTCCollision* collisions, - size_t num_collisions); - - void rtcCollide ( - RTCScene hscene0, - RTCScene hscene1, - RTCCollideFunc callback, - void* userPtr - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcCollide`` function intersects the BVH of ``hscene0`` with the -BVH of scene ``hscene1`` and calls a user defined callback function (e.g -``callback`` argument) for each pair of intersecting primitives between -the two scenes. A user defined data pointer (``userPtr`` argument) can -also be passed in. - -For every pair of primitives that may intersect each other, the callback -function (``callback`` argument) is called. The user will be provided -with the primID’s and geomID’s of multiple potentially intersecting -primitive pairs. Currently, only scene entirely composed of user -geometries are supported, thus the user is expected to implement a -primitive/primitive intersection to filter out false positives in the -callback function. The ``userPtr`` argument can be used to input -geometry data of the scene or output results of the intersection query. - -SUPPORTED PRIMITIVES -^^^^^^^^^^^^^^^^^^^^ - -Currently, the only supported type is the user geometry type (see -`RTC_GEOMETRY_TYPE_USER <#rtc_geometry_type_user>`__). - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -.. raw:: latex - - \pagebreak - -rtcNewBVH ---------- - -NAME -^^^^ - -.. code:: cpp - - rtcNewBVH - creates a new BVH object - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - RTCBVH rtcNewBVH(RTCDevice device); - -DESCRIPTION -^^^^^^^^^^^ - -This function creates a new BVH object and returns a handle to this BVH. -The BVH object is reference counted with an initial reference count of -1. The handle can be released using the ``rtcReleaseBVH`` API call. - -The BVH object can be used to build a BVH in a user-specified format -over user-specified primitives. See the documentation of the -``rtcBuildBVH`` call for more details. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcRetainBVH <#rtcretainbvh>`__, `rtcReleaseBVH <#rtcreleasebvh>`__, -`rtcBuildBVH <#rtcbuildbvh>`__ - -.. raw:: latex - - \pagebreak - -rtcRetainBVH ------------- - -NAME -^^^^ - -.. code:: cpp - - rtcRetainBVH - increments the BVH reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcRetainBVH(RTCBVH bvh); - -DESCRIPTION -^^^^^^^^^^^ - -BVH objects are reference counted. The ``rtcRetainBVH`` function -increments the reference count of the passed BVH object (``bvh`` -argument). This function together with ``rtcReleaseBVH`` allows to use -the internal reference counting in a C++ wrapper class to handle the -ownership of the object. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewBVH <#rtcnewbvh>`__, `rtcReleaseBVH <#rtcreleasebvh>`__ - -.. raw:: latex - - \pagebreak - -rtcReleaseBVH -------------- - -NAME -^^^^ - -.. code:: cpp - - rtcReleaseBVH - decrements the BVH reference count - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - void rtcReleaseBVH(RTCBVH bvh); - -DESCRIPTION -^^^^^^^^^^^ - -BVH objects are reference counted. The ``rtcReleaseBVH`` function -decrements the reference count of the passed BVH object (``bvh`` -argument). When the reference count falls to 0, the BVH gets destroyed. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewBVH <#rtcnewbvh>`__, `rtcRetainBVH <#rtcretainbvh>`__ - -.. raw:: latex - - \pagebreak - -rtcBuildBVH ------------ - -NAME -^^^^ - -.. code:: cpp - - rtcBuildBVH - builds a BVH - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - #include - - struct RTC_ALIGN(32) RTCBuildPrimitive - { - float lower_x, lower_y, lower_z; - unsigned int geomID; - float upper_x, upper_y, upper_z; - unsigned int primID; - }; - - typedef void* (*RTCCreateNodeFunction) ( - RTCThreadLocalAllocator allocator, - unsigned int childCount, - void* userPtr - ); - - typedef void (*RTCSetNodeChildrenFunction) ( - void* nodePtr, - void** children, - unsigned int childCount, - void* userPtr - ); - - typedef void (*RTCSetNodeBoundsFunction) ( - void* nodePtr, - const struct RTCBounds** bounds, - unsigned int childCount, - void* userPtr - ); - - typedef void* (*RTCCreateLeafFunction) ( - RTCThreadLocalAllocator allocator, - const struct RTCBuildPrimitive* primitives, - size_t primitiveCount, - void* userPtr - ); - - typedef void (*RTCSplitPrimitiveFunction) ( - const struct RTCBuildPrimitive* primitive, - unsigned int dimension, - float position, - struct RTCBounds* leftBounds, - struct RTCBounds* rightBounds, - void* userPtr - ); - - typedef bool (*RTCProgressMonitorFunction)( - void* userPtr, double n - ); - - enum RTCBuildFlags - { - RTC_BUILD_FLAG_NONE, - RTC_BUILD_FLAG_DYNAMIC - }; - - struct RTCBuildArguments - { - size_t byteSize; - - enum RTCBuildQuality buildQuality; - enum RTCBuildFlags buildFlags; - unsigned int maxBranchingFactor; - unsigned int maxDepth; - unsigned int sahBlockSize; - unsigned int minLeafSize; - unsigned int maxLeafSize; - float traversalCost; - float intersectionCost; - - RTCBVH bvh; - struct RTCBuildPrimitive* primitives; - size_t primitiveCount; - size_t primitiveArrayCapacity; - - RTCCreateNodeFunction createNode; - RTCSetNodeChildrenFunction setNodeChildren; - RTCSetNodeBoundsFunction setNodeBounds; - RTCCreateLeafFunction createLeaf; - RTCSplitPrimitiveFunction splitPrimitive; - RTCProgressMonitorFunction buildProgress; - void* userPtr; - }; - - struct RTCBuildArguments rtcDefaultBuildArguments(); - - void* rtcBuildBVH( - const struct RTCBuildArguments* args - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcBuildBVH`` function can be used to build a BVH in a -user-defined format over arbitrary primitives. All arguments to the -function are provided through the ``RTCBuildArguments`` structure. The -first member of that structure must be set to the size of the structure -in bytes (``bytesSize`` member) which allows future extensions of the -structure. It is recommended to initialize the build arguments structure -using the ``rtcDefaultBuildArguments`` function. - -The ``rtcBuildBVH`` function gets passed the BVH to build (``bvh`` -member), the array of primitives (``primitives`` member), the capacity -of that array (``primitiveArrayCapacity`` member), the number of -primitives stored inside the array (``primitiveCount`` member), callback -function pointers, and a user-defined pointer (``userPtr`` member) that -is passed to all callback functions when invoked. The ``primitives`` -array can be freed by the application after the BVH is built. All -callback functions are typically called from multiple threads, thus -their implementation must be thread-safe. - -Four callback functions must be registered, which are invoked during -build to create BVH nodes (``createNode`` member), to set the pointers -to all children (``setNodeChildren`` member), to set the bounding boxes -of all children (``setNodeBounds`` member), and to create a leaf node -(``createLeaf`` member). - -The function pointer to the primitive split function (``splitPrimitive`` -member) may be ``NULL``, however, then no spatial splitting in high -quality mode is possible. The function pointer used to report the build -progress (``buildProgress`` member) is optional and may also be -``NULL``. - -Further, some build settings are passed to configure the BVH build. -Using the build quality settings (``buildQuality`` member), one can -select between a faster, low quality build which is good for dynamic -scenes, and a standard quality build for static scenes. One can also -specify the desired maximum branching factor of the BVH -(``maxBranchingFactor`` member), the maximum depth the BVH should have -(``maxDepth`` member), the block size for the SAH heuristic -(``sahBlockSize`` member), the minimum and maximum leaf size -(``minLeafSize`` and ``maxLeafSize`` member), and the estimated costs of -one traversal step and one primitive intersection (``traversalCost`` and -``intersectionCost`` members). When enabling the -``RTC_BUILD_FLAG_DYNAMIC`` build flags (``buildFlags`` member), re-build -performance for dynamic scenes is improved at the cost of higher memory -requirements. - -To spatially split primitives in high quality mode, the builder needs -extra space at the end of the build primitive array to store splitted -primitives. The total capacity of the build primitive array is passed -using the ``primitiveArrayCapacity`` member, and should be about twice -the number of primitives when using spatial splits. - -The ``RTCCreateNodeFunc`` and ``RTCCreateLeafFunc`` callbacks are passed -a thread local allocator object that should be used for fast allocation -of nodes using the ``rtcThreadLocalAlloc`` function. We strongly -recommend using this allocation mechanism, as alternative approaches -like standard ``malloc`` can be over 10× slower. The allocator object -passed to the create callbacks may be used only inside the current -thread. Memory allocated using ``rtcThreadLocalAlloc`` is automatically -freed when the ``RTCBVH`` object is deleted. If you use your own memory -allocation scheme you have to free the memory yourself when the -``RTCBVH`` object is no longer used. - -The ``RTCCreateNodeFunc`` callback additionally gets the number of -children for this node in the range from 2 to ``maxBranchingFactor`` -(``childCount`` argument). - -The ``RTCSetNodeChildFunc`` callback function gets a pointer to the node -as input (``nodePtr`` argument), an array of pointers to the children -(``childPtrs`` argument), and the size of this array (``childCount`` -argument). - -The ``RTCSetNodeBoundsFunc`` callback function gets a pointer to the -node as input (``nodePtr`` argument), an array of pointers to the -bounding boxes of the children (``bounds`` argument), and the size of -this array (``childCount`` argument). - -The ``RTCCreateLeafFunc`` callback additionally gets an array of -primitives as input (``primitives`` argument), and the size of this -array (``primitiveCount`` argument). The callback should read the -``geomID`` and ``primID`` members from the passed primitives to -construct the leaf. - -The ``RTCSplitPrimitiveFunc`` callback is invoked in high quality mode -to split a primitive (``primitive`` argument) at the specified position -(``position`` argument) and dimension (``dimension`` argument). The -callback should return bounds of the clipped left and right parts of the -primitive (``leftBounds`` and ``rightBounds`` arguments). - -The ``RTCProgressMonitorFunction`` callback function is called with the -estimated completion rate ``n`` in the range :math:`[0,1]`. Returning -``true`` from the callback lets the build continue; returning ``false`` -cancels the build. - -EXIT STATUS -^^^^^^^^^^^ - -On failure an error code is set that can be queried using -``rtcGetDeviceError``. - -SEE ALSO -^^^^^^^^ - -`rtcNewBVH <#rtcnewbvh>`__ - -.. raw:: latex - - \pagebreak - -RTCQuaternionDecomposition --------------------------- - -NAME -^^^^ - -.. code:: cpp - - RTCQuaternionDecomposition - structure that represents a quaternion - decomposition of an affine transformation - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - struct RTCQuaternionDecomposition - { - float scale_x, scale_y, scale_z; - float skew_xy, skew_xz, skew_yz; - float shift_x, shift_y, shift_z; - float quaternion_r, quaternion_i, quaternion_j, quaternion_k; - float translation_x, translation_y, translation_z; - }; - -DESCRIPTION -^^^^^^^^^^^ - -The struct ``RTCQuaternionDecomposition`` represents an affine -transformation decomposed into three parts. An upper triangular -scaling/skew/shift matrix - -.. math:: - - - S = \left( \begin{array}{cccc} - scale_x & skew_{xy} & skew_{xz} & shift_x \\ - 0 & scale_y & skew_{yz} & shift_y \\ - 0 & 0 & scale_z & shift_z \\ - 0 & 0 & 0 & 1 \\ - \end{array} \right), - -a translation matrix - -.. math:: - - - T = \left( \begin{array}{cccc} - 1 & 0 & 0 & translation_x \\ - 0 & 1 & 0 & translation_y \\ - 0 & 0 & 1 & translation_z \\ - 0 & 0 & 0 & 1 \\ - \end{array} \right), - -and a rotation matrix :math:`R`, represented as a quaternion - -:math:`quaternion_r + quaternion_i \ \mathbf{i} + quaternion_j \ \mathbf{i} + quaternion_k \ \mathbf{k}` - -where :math:`\mathbf{i}`, :math:`\mathbf{j}` :math:`\mathbf{k}` are the -imaginary quaternion units. The passed quaternion will be normalized -internally. - -The affine transformation matrix corresponding to a -``RTCQuaternionDecomposition`` is :math:`TRS` and a point -:math:`p = (p_x, p_y, p_z, 1)^T` will be transformed as - -.. math:: p' = T \ R \ S \ p. - -The functions ``rtcInitQuaternionDecomposition``, -``rtcQuaternionDecompositionSetQuaternion``, -``rtcQuaternionDecompositionSetScale``, -``rtcQuaternionDecompositionSetSkew``, -``rtcQuaternionDecompositionSetShift``, and -``rtcQuaternionDecompositionSetTranslation`` allow to set the fields of -the structure more conveniently. - -EXIT STATUS -^^^^^^^^^^^ - -No error code is set by this function. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryTransformQuaternion <#rtcsetgeometrytransformquaternion>`__, -`rtcInitQuaternionDecomposition <#rtcinitquaterniondecomposition>`__ - -.. raw:: latex - - \pagebreak - -rtcInitQuaternionDecomposition ------------------------------- - -NAME -^^^^ - -.. code:: cpp - - rtcInitQuaternionDecomposition - initializes quaternion decomposition - -SYNOPSIS -^^^^^^^^ - -.. code:: cpp - - void rtcInitQuaternionDecomposition( - struct RTCQuaternionDecomposition* qd - ); - -DESCRIPTION -^^^^^^^^^^^ - -The ``rtcInitQuaternionDecomposition`` function initializes a -``RTCQuaternionDecomposition`` structure to represent an identity -transformation. - -EXIT STATUS -^^^^^^^^^^^ - -No error code is set by this function. - -SEE ALSO -^^^^^^^^ - -`rtcSetGeometryTransformQuaternion <#rtcsetgeometrytransformquaternion>`__, -`RTCQuaternionDecomposition <#rtcquaterniondecomposition>`__ - -.. raw:: latex - - \pagebreak - -.. raw:: latex - - \pagebreak diff --git a/source/elements/oneART/source/embree.rst b/source/elements/oneART/source/embree.rst deleted file mode 100644 index b99bc8b35..000000000 --- a/source/elements/oneART/source/embree.rst +++ /dev/null @@ -1,46 +0,0 @@ -.. SPDX-FileCopyrightText: 2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _embree-section: - -====== -Embree -====== - -Embree is a collection of high-performance ray tracing kernels. The -Embree target users are graphics application engineers who want to -improve the performance of their photo-realistic rendering application -by leveraging Embree’s performance-optimized ray tracing -kernels. Embree supports runtime code selection to choose the -traversal and build algorithms that best matches the instruction set -of your CPU. - -Embree supports applications written with the Intel® SPMD Program -Compiler (ISPC, https://ispc.github.io/) by also providing an ISPC -interface to the core ray tracing algorithms. This makes it possible -to write a renderer in ISPC that automatically vectorizes and -leverages SSE, AVX, AVX2, and AVX-512 instructions. ISPC also supports -runtime code selection, thus ISPC will select the best code path for -your application. - -Embree contains algorithms optimized for incoherent workloads -(e.g. Monte Carlo ray tracing algorithms) and coherent workloads -(e.g. primary visibility and hard shadow rays). - -The single-ray traversal kernels of Embree provide high performance -for incoherent workloads and are very easy to integrate into existing -rendering applications. Using the stream kernels, even higher -performance for incoherent rays is possible, but integration might -require significant code changes to the application to use the stream -paradigm. In general for coherent workloads, the stream mode with -coherent flag set gives the best performance. - -Embree also supports dynamic scenes by implementing high-performance -two-level spatial index structure construction algorithms. - -.. toctree:: - :maxdepth: 1 - - embree-spec - diff --git a/source/elements/oneART/source/future-consider.rst b/source/elements/oneART/source/future-consider.rst deleted file mode 100644 index 2a3590857..000000000 --- a/source/elements/oneART/source/future-consider.rst +++ /dev/null @@ -1,10 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _future-section: - -===================== -Future Considerations -===================== - diff --git a/source/elements/oneART/source/images/ColoredWindow.jpg b/source/elements/oneART/source/images/ColoredWindow.jpg deleted file mode 100644 index c3b56acf9..000000000 Binary files a/source/elements/oneART/source/images/ColoredWindow.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/camera_architectural.jpg b/source/elements/oneART/source/images/camera_architectural.jpg deleted file mode 100644 index 6bbea7d78..000000000 Binary files a/source/elements/oneART/source/images/camera_architectural.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/camera_orthographic.jpg b/source/elements/oneART/source/images/camera_orthographic.jpg deleted file mode 100644 index 4b53d5faa..000000000 Binary files a/source/elements/oneART/source/images/camera_orthographic.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/camera_panoramic.jpg b/source/elements/oneART/source/images/camera_panoramic.jpg deleted file mode 100644 index 39962e611..000000000 Binary files a/source/elements/oneART/source/images/camera_panoramic.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/camera_perspective.jpg b/source/elements/oneART/source/images/camera_perspective.jpg deleted file mode 100644 index 91fc1564a..000000000 Binary files a/source/elements/oneART/source/images/camera_perspective.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/camera_stereo.jpg b/source/elements/oneART/source/images/camera_stereo.jpg deleted file mode 100644 index 98412fe14..000000000 Binary files a/source/elements/oneART/source/images/camera_stereo.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/closest_point.jpg b/source/elements/oneART/source/images/closest_point.jpg deleted file mode 100644 index 5047d1e88..000000000 Binary files a/source/elements/oneART/source/images/closest_point.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/collide.jpg b/source/elements/oneART/source/images/collide.jpg deleted file mode 100644 index f3d793252..000000000 Binary files a/source/elements/oneART/source/images/collide.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/curve_geometry.jpg b/source/elements/oneART/source/images/curve_geometry.jpg deleted file mode 100644 index b121d2122..000000000 Binary files a/source/elements/oneART/source/images/curve_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/diffuse_rooms.png b/source/elements/oneART/source/images/diffuse_rooms.png deleted file mode 100644 index bf9dbea77..000000000 Binary files a/source/elements/oneART/source/images/diffuse_rooms.png and /dev/null differ diff --git a/source/elements/oneART/source/images/displacement_geometry.jpg b/source/elements/oneART/source/images/displacement_geometry.jpg deleted file mode 100644 index dd7fa8e7c..000000000 Binary files a/source/elements/oneART/source/images/displacement_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/dynamic_scene.jpg b/source/elements/oneART/source/images/dynamic_scene.jpg deleted file mode 100644 index 79f4235a2..000000000 Binary files a/source/elements/oneART/source/images/dynamic_scene.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/exampleViewer.jpg b/source/elements/oneART/source/images/exampleViewer.jpg deleted file mode 100644 index 4b29d70ad..000000000 Binary files a/source/elements/oneART/source/images/exampleViewer.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/gitter_badge.svg b/source/elements/oneART/source/images/gitter_badge.svg deleted file mode 100644 index 7064d7f43..000000000 --- a/source/elements/oneART/source/images/gitter_badge.svg +++ /dev/null @@ -1 +0,0 @@ -chatchaton gitteron gitter \ No newline at end of file diff --git a/source/elements/oneART/source/images/grid_geometry.jpg b/source/elements/oneART/source/images/grid_geometry.jpg deleted file mode 100644 index 713053b48..000000000 Binary files a/source/elements/oneART/source/images/grid_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/hair_geometry.jpg b/source/elements/oneART/source/images/hair_geometry.jpg deleted file mode 100644 index 45d020a83..000000000 Binary files a/source/elements/oneART/source/images/hair_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/half_edges.fig b/source/elements/oneART/source/images/half_edges.fig deleted file mode 100644 index 364c78d66..000000000 --- a/source/elements/oneART/source/images/half_edges.fig +++ /dev/null @@ -1,111 +0,0 @@ -#FIG 3.2 Produced by xfig version 3.2.6 -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 2775 7200 375 7200 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 2775 9675 2775 7275 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 375 7275 375 9675 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 375 9750 2775 9750 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 5475 7275 3075 7275 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 5475 9750 5475 7350 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 3075 7350 3075 9750 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 3075 9825 5475 9825 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 8325 7275 5925 7275 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 8325 9750 8325 7350 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 5925 7350 5925 9750 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 5925 9825 8325 9825 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 8325 4350 5925 4350 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 8325 6825 8325 4425 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 5925 4425 5925 6825 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 5925 6900 8325 6900 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 2850 4275 450 4275 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 2850 6750 2850 4350 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 450 4350 450 6750 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 450 6825 2850 6825 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 5550 4350 3150 4350 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 5550 6825 5550 4425 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 3150 4425 3150 6825 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 3150 6900 5550 6900 -4 0 0 50 -1 14 20 0.0000 4 210 390 1275 9600 e0\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 2325 8625 e1\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 450 8475 e3\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 1575 7500 e2\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 1425 8550 f0\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 3975 9675 e4\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 5025 8700 e5\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 3150 8550 e7\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 4275 7575 e6\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 6825 9675 e8\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 7875 8700 e9\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 6000 8550 e11\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 7125 7575 e10\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 6975 8625 f2\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 6825 6750 e20\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 6000 5625 e23\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 7125 4650 e22\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 6975 5700 f5\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 1350 6675 e12\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 525 5550 e15\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 1650 4575 e14\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 1500 5625 f3\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 4050 6750 e16\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 3225 5625 e19\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 4350 4650 e18\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 4200 5700 f4\001 -4 0 0 50 -1 14 20 0.0000 4 210 390 4125 8625 f1\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 2175 5700 e13\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 4875 5775 e17\001 -4 0 0 50 -1 14 20 0.0000 4 210 585 7725 5775 e21\001 diff --git a/source/elements/oneART/source/images/half_edges.pdf b/source/elements/oneART/source/images/half_edges.pdf deleted file mode 100644 index 70395eb87..000000000 Binary files a/source/elements/oneART/source/images/half_edges.pdf and /dev/null differ diff --git a/source/elements/oneART/source/images/half_edges.png b/source/elements/oneART/source/images/half_edges.png deleted file mode 100644 index 747c8dc95..000000000 Binary files a/source/elements/oneART/source/images/half_edges.png and /dev/null differ diff --git a/source/elements/oneART/source/images/hdri_light.png b/source/elements/oneART/source/images/hdri_light.png deleted file mode 100644 index 78e45eb91..000000000 Binary files a/source/elements/oneART/source/images/hdri_light.png and /dev/null differ diff --git a/source/elements/oneART/source/images/icon.png b/source/elements/oneART/source/images/icon.png deleted file mode 100644 index f7d90e0a0..000000000 Binary files a/source/elements/oneART/source/images/icon.png and /dev/null differ diff --git a/source/elements/oneART/source/images/icon192.png b/source/elements/oneART/source/images/icon192.png deleted file mode 100644 index ebb38b4af..000000000 Binary files a/source/elements/oneART/source/images/icon192.png and /dev/null differ diff --git a/source/elements/oneART/source/images/icon32.ico b/source/elements/oneART/source/images/icon32.ico deleted file mode 100644 index 0d5bee5f8..000000000 Binary files a/source/elements/oneART/source/images/icon32.ico and /dev/null differ diff --git a/source/elements/oneART/source/images/instanced_geometry.jpg b/source/elements/oneART/source/images/instanced_geometry.jpg deleted file mode 100644 index 1da5ffb69..000000000 Binary files a/source/elements/oneART/source/images/instanced_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/interpolation.jpg b/source/elements/oneART/source/images/interpolation.jpg deleted file mode 100644 index 66ca90eb0..000000000 Binary files a/source/elements/oneART/source/images/interpolation.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/intersection_filter.jpg b/source/elements/oneART/source/images/intersection_filter.jpg deleted file mode 100644 index ce58e0e10..000000000 Binary files a/source/elements/oneART/source/images/intersection_filter.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/material_Alloy.jpg b/source/elements/oneART/source/images/material_Alloy.jpg deleted file mode 100644 index 080b9fb76..000000000 Binary files a/source/elements/oneART/source/images/material_Alloy.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/material_CarPaint.jpg b/source/elements/oneART/source/images/material_CarPaint.jpg deleted file mode 100644 index e896290a8..000000000 Binary files a/source/elements/oneART/source/images/material_CarPaint.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/material_Glass.jpg b/source/elements/oneART/source/images/material_Glass.jpg deleted file mode 100644 index d0f9ee894..000000000 Binary files a/source/elements/oneART/source/images/material_Glass.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/material_Luminous.jpg b/source/elements/oneART/source/images/material_Luminous.jpg deleted file mode 100644 index fdb8b50c2..000000000 Binary files a/source/elements/oneART/source/images/material_Luminous.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/material_Metal.jpg b/source/elements/oneART/source/images/material_Metal.jpg deleted file mode 100644 index 334e99a46..000000000 Binary files a/source/elements/oneART/source/images/material_Metal.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/material_MetallicPaint.jpg b/source/elements/oneART/source/images/material_MetallicPaint.jpg deleted file mode 100644 index eb9bf9e31..000000000 Binary files a/source/elements/oneART/source/images/material_MetallicPaint.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/material_OBJ.jpg b/source/elements/oneART/source/images/material_OBJ.jpg deleted file mode 100644 index 6de826229..000000000 Binary files a/source/elements/oneART/source/images/material_OBJ.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/material_Principled.jpg b/source/elements/oneART/source/images/material_Principled.jpg deleted file mode 100644 index 7c1d6d2bf..000000000 Binary files a/source/elements/oneART/source/images/material_Principled.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/material_ThinGlass.jpg b/source/elements/oneART/source/images/material_ThinGlass.jpg deleted file mode 100644 index 6d7f84c8a..000000000 Binary files a/source/elements/oneART/source/images/material_ThinGlass.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/mazda_64spp_input.jpg b/source/elements/oneART/source/images/mazda_64spp_input.jpg deleted file mode 100644 index e71bb1888..000000000 Binary files a/source/elements/oneART/source/images/mazda_64spp_input.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/mazda_64spp_oidn.jpg b/source/elements/oneART/source/images/mazda_64spp_oidn.jpg deleted file mode 100644 index 87dd154b8..000000000 Binary files a/source/elements/oneART/source/images/mazda_64spp_oidn.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/mazda_firsthit_512spp_albedo.jpg b/source/elements/oneART/source/images/mazda_firsthit_512spp_albedo.jpg deleted file mode 100644 index 6795d3db6..000000000 Binary files a/source/elements/oneART/source/images/mazda_firsthit_512spp_albedo.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/mazda_firsthit_512spp_normal.jpg b/source/elements/oneART/source/images/mazda_firsthit_512spp_normal.jpg deleted file mode 100644 index a9dbd7f5e..000000000 Binary files a/source/elements/oneART/source/images/mazda_firsthit_512spp_normal.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/mazda_nondeltahit_512spp_albedo.jpg b/source/elements/oneART/source/images/mazda_nondeltahit_512spp_albedo.jpg deleted file mode 100644 index cb1374275..000000000 Binary files a/source/elements/oneART/source/images/mazda_nondeltahit_512spp_albedo.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/mazda_nondeltahit_512spp_normal.jpg b/source/elements/oneART/source/images/mazda_nondeltahit_512spp_normal.jpg deleted file mode 100644 index f20a972cd..000000000 Binary files a/source/elements/oneART/source/images/mazda_nondeltahit_512spp_normal.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/motion_blur_geometry.jpg b/source/elements/oneART/source/images/motion_blur_geometry.jpg deleted file mode 100644 index 6262b87ad..000000000 Binary files a/source/elements/oneART/source/images/motion_blur_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/multi_level_instancing.jpg b/source/elements/oneART/source/images/multi_level_instancing.jpg deleted file mode 100644 index 0ccfad09a..000000000 Binary files a/source/elements/oneART/source/images/multi_level_instancing.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/normalmap_frustum.png b/source/elements/oneART/source/images/normalmap_frustum.png deleted file mode 100644 index af33e9c16..000000000 Binary files a/source/elements/oneART/source/images/normalmap_frustum.png and /dev/null differ diff --git a/source/elements/oneART/source/images/ospExamples.png b/source/elements/oneART/source/images/ospExamples.png deleted file mode 100644 index a6dff8081..000000000 Binary files a/source/elements/oneART/source/images/ospExamples.png and /dev/null differ diff --git a/source/elements/oneART/source/images/pathtracer.jpg b/source/elements/oneART/source/images/pathtracer.jpg deleted file mode 100644 index 9f2683da3..000000000 Binary files a/source/elements/oneART/source/images/pathtracer.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/point_geometry.jpg b/source/elements/oneART/source/images/point_geometry.jpg deleted file mode 100644 index 0482f80a8..000000000 Binary files a/source/elements/oneART/source/images/point_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/quad_light.png b/source/elements/oneART/source/images/quad_light.png deleted file mode 100644 index 8769c61c6..000000000 Binary files a/source/elements/oneART/source/images/quad_light.png and /dev/null differ diff --git a/source/elements/oneART/source/images/quad_uv.fig b/source/elements/oneART/source/images/quad_uv.fig deleted file mode 100644 index f4aacedbe..000000000 --- a/source/elements/oneART/source/images/quad_uv.fig +++ /dev/null @@ -1,29 +0,0 @@ -#FIG 3.2 Produced by xfig version 3.2.5b -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 1425 8175 9075 6975 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 1425 8175 3300 3450 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 2 - 8475 2700 9075 6975 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 2 - 3300 3450 8475 2700 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 1 2 - 1 1 8.00 60.00 120.00 - 5625 2625 5775 5400 -4 0 0 50 -1 14 20 0.0000 4 255 390 1350 8475 p0\001 -4 0 0 50 -1 14 20 0.0000 4 150 195 5175 7875 u\001 -4 0 0 50 -1 14 20 0.0000 4 150 195 2550 5700 v\001 -4 0 0 50 -1 14 20 0.0000 4 255 390 3150 3225 p3\001 -4 0 0 50 -1 14 20 0.0000 4 255 390 8625 2625 p2\001 -4 0 0 50 -1 14 20 0.0000 4 255 390 9225 7125 p1\001 -4 0 0 50 -1 14 20 0.0000 4 240 390 6150 4425 Ng\001 diff --git a/source/elements/oneART/source/images/quad_uv.pdf b/source/elements/oneART/source/images/quad_uv.pdf deleted file mode 100644 index 1f6d18b07..000000000 Binary files a/source/elements/oneART/source/images/quad_uv.pdf and /dev/null differ diff --git a/source/elements/oneART/source/images/quad_uv.png b/source/elements/oneART/source/images/quad_uv.png deleted file mode 100644 index 08b668531..000000000 Binary files a/source/elements/oneART/source/images/quad_uv.png and /dev/null differ diff --git a/source/elements/oneART/source/images/quaternion_motion_blur.jpg b/source/elements/oneART/source/images/quaternion_motion_blur.jpg deleted file mode 100644 index af36db36e..000000000 Binary files a/source/elements/oneART/source/images/quaternion_motion_blur.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/renderSunSky.png b/source/elements/oneART/source/images/renderSunSky.png deleted file mode 100644 index 846bb7a22..000000000 Binary files a/source/elements/oneART/source/images/renderSunSky.png and /dev/null differ diff --git a/source/elements/oneART/source/images/spot_coords.png b/source/elements/oneART/source/images/spot_coords.png deleted file mode 100644 index 24ea5c49c..000000000 Binary files a/source/elements/oneART/source/images/spot_coords.png and /dev/null differ diff --git a/source/elements/oneART/source/images/spot_light.png b/source/elements/oneART/source/images/spot_light.png deleted file mode 100644 index 439192f2f..000000000 Binary files a/source/elements/oneART/source/images/spot_light.png and /dev/null differ diff --git a/source/elements/oneART/source/images/structured_spherical_coords.png b/source/elements/oneART/source/images/structured_spherical_coords.png deleted file mode 100644 index 8004dc834..000000000 Binary files a/source/elements/oneART/source/images/structured_spherical_coords.png and /dev/null differ diff --git a/source/elements/oneART/source/images/structured_spherical_coords.svg b/source/elements/oneART/source/images/structured_spherical_coords.svg deleted file mode 100644 index 278e4143f..000000000 --- a/source/elements/oneART/source/images/structured_spherical_coords.svg +++ /dev/null @@ -1,137 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - x - y - z - - (r, θ, φ) - - - - - φ - θ - r - - - - diff --git a/source/elements/oneART/source/images/subdivision_geometry.jpg b/source/elements/oneART/source/images/subdivision_geometry.jpg deleted file mode 100644 index 61bb54a22..000000000 Binary files a/source/elements/oneART/source/images/subdivision_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/triangle_geometry.jpg b/source/elements/oneART/source/images/triangle_geometry.jpg deleted file mode 100644 index 3115c6088..000000000 Binary files a/source/elements/oneART/source/images/triangle_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/triangle_uv.fig b/source/elements/oneART/source/images/triangle_uv.fig deleted file mode 100644 index f98092209..000000000 --- a/source/elements/oneART/source/images/triangle_uv.fig +++ /dev/null @@ -1,26 +0,0 @@ -#FIG 3.2 Produced by xfig version 3.2.5b -Landscape -Center -Inches -Letter -100.00 -Single --2 -1200 2 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 1425 8175 9525 6900 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 1 0 2 - 1 1 8.00 60.00 120.00 - 1425 8175 3450 3075 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 0 2 - 3450 3075 9525 6900 -2 1 0 2 0 7 50 -1 -1 0.000 0 0 7 0 1 2 - 1 1 8.00 60.00 120.00 - 4275 4200 4275 5850 -4 0 0 50 -1 14 20 0.0000 4 255 390 1350 8475 p0\001 -4 0 0 50 -1 14 20 0.0000 4 255 390 3375 2850 p2\001 -4 0 0 50 -1 14 20 0.0000 4 255 390 9450 7200 p1\001 -4 0 0 50 -1 14 20 0.0000 4 150 195 5175 7875 u\001 -4 0 0 50 -1 14 20 0.0000 4 150 195 2550 5700 v\001 -4 0 0 50 -1 14 20 0.0000 4 240 390 4425 5025 Ng\001 diff --git a/source/elements/oneART/source/images/triangle_uv.pdf b/source/elements/oneART/source/images/triangle_uv.pdf deleted file mode 100644 index 33a903eb9..000000000 Binary files a/source/elements/oneART/source/images/triangle_uv.pdf and /dev/null differ diff --git a/source/elements/oneART/source/images/triangle_uv.png b/source/elements/oneART/source/images/triangle_uv.png deleted file mode 100644 index 7b158f0a0..000000000 Binary files a/source/elements/oneART/source/images/triangle_uv.png and /dev/null differ diff --git a/source/elements/oneART/source/images/tutorial_accumulatedframe.png b/source/elements/oneART/source/images/tutorial_accumulatedframe.png deleted file mode 100644 index 3cb6cb95e..000000000 Binary files a/source/elements/oneART/source/images/tutorial_accumulatedframe.png and /dev/null differ diff --git a/source/elements/oneART/source/images/tutorial_firstframe.png b/source/elements/oneART/source/images/tutorial_firstframe.png deleted file mode 100644 index 202b16a6b..000000000 Binary files a/source/elements/oneART/source/images/tutorial_firstframe.png and /dev/null differ diff --git a/source/elements/oneART/source/images/user_geometry.jpg b/source/elements/oneART/source/images/user_geometry.jpg deleted file mode 100644 index b5d51d18c..000000000 Binary files a/source/elements/oneART/source/images/user_geometry.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/vdb_structure.png b/source/elements/oneART/source/images/vdb_structure.png deleted file mode 100644 index d1d3f4fe7..000000000 Binary files a/source/elements/oneART/source/images/vdb_structure.png and /dev/null differ diff --git a/source/elements/oneART/source/images/viewer.jpg b/source/elements/oneART/source/images/viewer.jpg deleted file mode 100644 index fa32ffa81..000000000 Binary files a/source/elements/oneART/source/images/viewer.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/viewer_stream.jpg b/source/elements/oneART/source/images/viewer_stream.jpg deleted file mode 100644 index 9d50b9d8a..000000000 Binary files a/source/elements/oneART/source/images/viewer_stream.jpg and /dev/null differ diff --git a/source/elements/oneART/source/images/voronoi.jpg b/source/elements/oneART/source/images/voronoi.jpg deleted file mode 100644 index 154cfcbc0..000000000 Binary files a/source/elements/oneART/source/images/voronoi.jpg and /dev/null differ diff --git a/source/elements/oneART/source/index.rst b/source/elements/oneART/source/index.rst deleted file mode 100644 index c69f14ff1..000000000 --- a/source/elements/oneART/source/index.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _oneart-section: - -=========== -Ray Tracing -=========== - -.. include:: overview.inc.rst - -.. toctree:: - :maxdepth: 1 - - component-libraries - appendices - versions - diff --git a/source/elements/oneART/source/ispc.rst b/source/elements/oneART/source/ispc.rst deleted file mode 100644 index 77b88bd5d..000000000 --- a/source/elements/oneART/source/ispc.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _ispc-section: - -=================================== -ISPC Implicit SPMD Program Compiler -=================================== - -ISPC is a compiler for a variant of the C programming language, with -extensions for "single program, multiple data" (SPMD) programming. -Under the SPMD model, the programmer writes a program that generally -appears to be a regular serial program, though the execution model is -actually that a number of program instances execute in parallel on -the hardware. - -ISPC compiles a C-based SPMD programming language to run on the -SIMD units of CPUs and GPUs; it frequently provides a 3x or more -speedup on architectures with 4-wide vector SSE units and 5x-6x on -architectures with 8-wide AVX vector units, without any of the difficulty -of writing intrinsics code. Parallelization across multiple cores is -also supported by ispc, making it possible to write programs that -achieve performance improvement that scales by both number of -cores and vector unit size. - -More information can be found at the `ISPC Implicit SPMD Program Compiler website`_. - -.. _`ISPC Implicit SPMD Program Compiler website`: https://ispc.github.io diff --git a/source/elements/oneART/source/nested-index.rst b/source/elements/oneART/source/nested-index.rst deleted file mode 100644 index 3e540eee7..000000000 --- a/source/elements/oneART/source/nested-index.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _oneart-section: - -=========== -Ray Tracing -=========== - -Overview --------- - -.. include:: overview.inc.rst - -.. toctree:: - :maxdepth: 1 - - component-libraries - appendices diff --git a/source/elements/oneART/source/oidn-intro.rst b/source/elements/oneART/source/oidn-intro.rst deleted file mode 100644 index 405e6f0ee..000000000 --- a/source/elements/oneART/source/oidn-intro.rst +++ /dev/null @@ -1,34 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -============ -Introduction -============ - -Open Image Denoise is an open, high-quality, -efficient, and easy-to-use denoising library that allows one to -significantly reduce rendering times in ray tracing based rendering -applications. It filters out the Monte Carlo noise inherent to -stochastic ray tracing methods like path tracing, reducing the amount -of necessary samples per pixel by even multiple orders of magnitude -(depending on the desired closeness to the ground truth). A simple but -flexible C/C++ API ensures that the library can be easily integrated -into most existing or new rendering solutions. - -At the heart of the Open Image Denoise library is a collection of -efficient deep learning based denoising filters, which were trained to -handle a wide range of samples per pixel (spp), from 1 spp to almost -fully converged. Thus it is suitable for both preview and final-frame -rendering. The filters can denoise images either using only the noisy -color (beauty) buffer, or, to preserve as much detail as possible, can -optionally utilize auxiliary feature buffers as well (e.g. albedo, -normal). Such buffers are supported by most renderers as arbitrary -output variables (AOVs) or can be usually implemented with little -effort. - -Although the library ships with a set of pre-trained filter models, it -is not mandatory to use these. To optimize a filter for a specific -renderer, sample count, content type, scene, etc., it is possible to -train the model using the included training toolkit and user-provided -image datasets. diff --git a/source/elements/oneART/source/oidn-spec.rst b/source/elements/oneART/source/oidn-spec.rst deleted file mode 100644 index 6c00bc67c..000000000 --- a/source/elements/oneART/source/oidn-spec.rst +++ /dev/null @@ -1,758 +0,0 @@ -Open Image Denoise API -====================== - -Open Image Denoise provides a C99 API (also compatible with C++) and a C++11 wrapper API as well. For simplicity, this document mostly refers to the C99 version of the API. - -The API is designed in an object-oriented manner, e.g. it contains device objects (``OIDNDevice`` type), buffer objects (``OIDNBuffer`` type), and filter objects (``OIDNFilter`` type). All objects are reference-counted, and handles can be released by calling the appropriate release function (e.g. ``oidnReleaseDevice``) or retained by incrementing the reference count (e.g. ``oidnRetainDevice``). - -An important aspect of objects is that setting their parameters do not have an immediate effect (with a few exceptions). Instead, objects with updated parameters are in an unusable state until the parameters get explicitly committed to a given object. The commit semantic allows for batching up multiple small changes, and specifies exactly when changes to objects will occur. - -All API calls are thread-safe, but operations that use the same device will be serialized, so the amount of API calls from different threads should be minimized. - -Examples --------- - -To have a quick overview of the C99 and C++11 APIs, see the following simple example code snippets. - -Basic denoising (C99 API) -~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - #include - ... - // Create an Intel Open Image Denoise device - OIDNDevice device = oidnNewDevice(OIDN_DEVICE_TYPE_DEFAULT); - oidnCommitDevice(device); - - // Create a filter for denoising a beauty (color) image using optional auxiliary images too - OIDNFilter filter = oidnNewFilter(device, "RT"); // generic ray tracing filter - oidnSetSharedFilterImage(filter, "color", colorPtr, - OIDN_FORMAT_FLOAT3, width, height, 0, 0, 0); // beauty - oidnSetSharedFilterImage(filter, "albedo", albedoPtr, - OIDN_FORMAT_FLOAT3, width, height, 0, 0, 0); // auxiliary - oidnSetSharedFilterImage(filter, "normal", normalPtr, - OIDN_FORMAT_FLOAT3, width, height, 0, 0, 0); // auxiliary - oidnSetSharedFilterImage(filter, "output", outputPtr, - OIDN_FORMAT_FLOAT3, width, height, 0, 0, 0); // denoised beauty - oidnSetFilter1b(filter, "hdr", true); // beauty image is HDR - oidnCommitFilter(filter); - - // Filter the image - oidnExecuteFilter(filter); - - // Check for errors - const char* errorMessage; - if (oidnGetDeviceError(device, &errorMessage) != OIDN_ERROR_NONE) - printf("Error: %s\n", errorMessage); - - // Cleanup - oidnReleaseFilter(filter); - oidnReleaseDevice(device); - -Basic denoising (C++11 API) -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - #include - ... - // Create an Intel Open Image Denoise device - oidn::DeviceRef device = oidn::newDevice(); - device.commit(); - - // Create a filter for denoising a beauty (color) image using optional auxiliary images too - oidn::FilterRef filter = device.newFilter("RT"); // generic ray tracing filter - filter.setImage("color", colorPtr, oidn::Format::Float3, width, height); // beauty - filter.setImage("albedo", albedoPtr, oidn::Format::Float3, width, height); // auxiliary - filter.setImage("normal", normalPtr, oidn::Format::Float3, width, height); // auxiliary - filter.setImage("output", outputPtr, oidn::Format::Float3, width, height); // denoised beauty - filter.set("hdr", true); // beauty image is HDR - filter.commit(); - - // Filter the image - filter.execute(); - - // Check for errors - const char* errorMessage; - if (device.getError(errorMessage) != oidn::Error::None) - std::cout << "Error: " << errorMessage << std::endl; - -Denoising with prefiltering (C++11 API) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:: - - // Create a filter for denoising a beauty (color) image using prefiltered auxiliary images too - oidn::FilterRef filter = device.newFilter("RT"); // generic ray tracing filter - filter.setImage("color", colorPtr, oidn::Format::Float3, width, height); // beauty - filter.setImage("albedo", albedoPtr, oidn::Format::Float3, width, height); // auxiliary - filter.setImage("normal", normalPtr, oidn::Format::Float3, width, height); // auxiliary - filter.setImage("output", outputPtr, oidn::Format::Float3, width, height); // denoised beauty - filter.set("hdr", true); // beauty image is HDR - filter.set("cleanAux", true); // auxiliary images will be prefiltered - filter.commit(); - - // Create a separate filter for denoising an auxiliary albedo image (in-place) - oidn::FilterRef albedoFilter = device.newFilter("RT"); // same filter type as for beauty - albedoFilter.setImage("albedo", albedoPtr, oidn::Format::Float3, width, height); - albedoFilter.setImage("output", albedoPtr, oidn::Format::Float3, width, height); - albedoFilter.commit(); - - // Create a separate filter for denoising an auxiliary normal image (in-place) - oidn::FilterRef normalFilter = device.newFilter("RT"); // same filter type as for beauty - normalFilter.setImage("normal", normalPtr, oidn::Format::Float3, width, height); - normalFilter.setImage("output", normalPtr, oidn::Format::Float3, width, height); - normalFilter.commit(); - - // Prefilter the auxiliary images - albedoFilter.execute(); - normalFilter.execute(); - - // Filter the beauty image - filter.execute(); - -Device ------- - -Intel Open Image Denoise supports a device concept, which allows different components of the application to use the Open Image Denoise API without interfering with each other. An application first needs to create a device with - -:: - - OIDNDevice oidnNewDevice(OIDNDeviceType type); - -where the ``type`` enumeration maps to a specific device implementation, which can be one of the following: - -.. table:: Supported device types, i.e., valid constants of type ``OIDNDeviceType``. - - +------------------------------+-------------------------------------------------------+ - | Name | Description | - +==============================+=======================================================+ - | ``OIDN_DEVICE_TYPE_DEFAULT`` | select the approximately fastest device | - +------------------------------+-------------------------------------------------------+ - | ``OIDN_DEVICE_TYPE_CPU`` | CPU device (requires SSE4.1 support or Apple Silicon) | - +------------------------------+-------------------------------------------------------+ - -Once a device is created, you can call - -:: - - void oidnSetDevice1b(OIDNDevice device, const char* name, bool value); - void oidnSetDevice1i(OIDNDevice device, const char* name, int value); - bool oidnGetDevice1b(OIDNDevice device, const char* name); - int oidnGetDevice1i(OIDNDevice device, const char* name); - -to set and get parameter values on the device. Note that some parameters are constants, thus trying to set them is an error. See the tables below for the parameters supported by devices. - -.. table:: Parameters supported by all devices. - - +---------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +===============+==================+=========+=============================================================================================================================================+ - | ``const int`` | ``version`` | | combined version number (major.minor.patch) with two decimal digits per component | - +---------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------+ - | ``const int`` | ``versionMajor`` | | major version number | - +---------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------+ - | ``const int`` | ``versionMinor`` | | minor version number | - +---------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------+ - | ``const int`` | ``versionPatch`` | | patch version number | - +---------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------+ - | ``int`` | ``verbose`` | | 0 verbosity level of the console output between 0–4; when set to 0, no output is printed, when set to a higher level more output is printed | - +---------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------+ - -.. table:: Additional parameters supported only by CPU devices. - - +----------+-----------------+---------+-----------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +==========+=================+=========+===================================================================================================================================+ - | ``int`` | ``numThreads`` | 0 | maximum number of threads which the library should use; 0 will set it automatically to get the best performance | - +----------+-----------------+---------+-----------------------------------------------------------------------------------------------------------------------------------+ - | ``bool`` | ``setAffinity`` | true | enables thread affinitization (pinning software threads to hardware threads) if it is necessary for achieving optimal performance | - +----------+-----------------+---------+-----------------------------------------------------------------------------------------------------------------------------------+ - -Note that the CPU device heavily relies on setting the thread affinities to achieve optimal performance, so it is highly recommended to leave this option enabled. However, this may interfere with the application if that also sets the thread affinities, potentially causing performance degradation. In such cases, the recommended solution is to either disable setting the affinities in the application or in Intel Open Image Denoise, or to always set/reset the affinities before/after each parallel region in the application (e.g., if using TBB, with ``tbb::task_arena`` and ``tbb::task_scheduler_observer``). - -Once parameters are set on the created device, the device must be committed with - -:: - - void oidnCommitDevice(OIDNDevice device); - -This device can then be used to construct further objects, such as buffers and filters. Note that a device can be committed only once during its lifetime. Before the application exits, it should release all devices by invoking - -:: - - void oidnReleaseDevice(OIDNDevice device); - -Note that Intel Open Image Denoise uses reference counting for all object types, so this function decreases the reference count of the device, and if the count reaches 0 the device will automatically get deleted. It is also possible to increase the reference count by calling - -:: - - void oidnRetainDevice(OIDNDevice device); - -An application typically creates only a single device. If required differently, it should only use a small number of devices at any given time. - -Error Handling -~~~~~~~~~~~~~~ - -Each user thread has its own error code per device. If an error occurs when calling an API function, this error code is set to the occurred error if it stores no previous error. The currently stored error can be queried by the application via - -:: - - OIDNError oidnGetDeviceError(OIDNDevice device, const char** outMessage); - -where ``outMessage`` can be a pointer to a C string which will be set to a more descriptive error message, or it can be ``NULL``. This function also clears the error code, which assures that the returned error code is always the first error occurred since the last invocation of ``oidnGetDeviceError`` on the current thread. Note that the optionally returned error message string is valid only until the next invocation of the function. - -Alternatively, the application can also register a callback function of type - -:: - - typedef void (*OIDNErrorFunction)(void* userPtr, OIDNError code, const char* message); - -via - -:: - - void oidnSetDeviceErrorFunction(OIDNDevice device, OIDNErrorFunction func, void* userPtr); - -to get notified when errors occur. Only a single callback function can be registered per device, and further invocations overwrite the previously set callback function, which do *not* require also calling the ``oidnCommitDevice`` function. Passing ``NULL`` as function pointer disables the registered callback function. When the registered callback function is invoked, it gets passed the user-defined payload (``userPtr`` argument as specified at registration time), the error code (``code`` argument) of the occurred error, as well as a string (``message`` argument) that further describes the error. The error code is always set even if an error callback function is registered. It is recommended to always set a error callback function, to detect all errors. - -When the device construction fails, ``oidnNewDevice`` returns ``NULL`` as device. To detect the error code of a such failed device construction, pass ``NULL`` as device to the ``oidnGetDeviceError`` function. For all other invocations of ``oidnGetDeviceError``, a proper device handle must be specified. - -The following errors are currently used by Intel Open Image Denoise: - -.. table:: Possible error codes, i.e., valid constants of type ``OIDNError``. - - +-------------------------------------+--------------------------------------------+ - | Name | Description | - +=====================================+============================================+ - | ``OIDN_ERROR_NONE`` | no error occurred | - +-------------------------------------+--------------------------------------------+ - | ``OIDN_ERROR_UNKNOWN`` | an unknown error occurred | - +-------------------------------------+--------------------------------------------+ - | ``OIDN_ERROR_INVALID_ARGUMENT`` | an invalid argument was specified | - +-------------------------------------+--------------------------------------------+ - | ``OIDN_ERROR_INVALID_OPERATION`` | the operation is not allowed | - +-------------------------------------+--------------------------------------------+ - | ``OIDN_ERROR_OUT_OF_MEMORY`` | not enough memory to execute the operation | - +-------------------------------------+--------------------------------------------+ - | ``OIDN_ERROR_UNSUPPORTED_HARDWARE`` | the hardware (e.g., CPU) is not supported | - +-------------------------------------+--------------------------------------------+ - | ``OIDN_ERROR_CANCELLED`` | the operation was cancelled by the user | - +-------------------------------------+--------------------------------------------+ - -Buffer ------- - -Large data like images can be passed to Intel Open Image Denoise either via pointers to memory allocated and managed by the user (this is the recommended, often easier and more efficient approach, if supported by the device) or by creating buffer objects (supported by all devices). To create a new data buffer with memory allocated and owned by the device, holding ``byteSize`` number of bytes, use - -:: - - OIDNBuffer oidnNewBuffer(OIDNDevice device, size_t byteSize); - -The created buffer is bound to the specified device (``device`` argument). The specified number of bytes are allocated at buffer construction time and deallocated when the buffer is destroyed. - -It is also possible to create a “shared” data buffer with memory allocated and managed by the user with - -:: - - OIDNBuffer oidnNewSharedBuffer(OIDNDevice device, void* ptr, size_t byteSize); - -where ``ptr`` points to the user-managed memory and ``byteSize`` is its size in bytes. At buffer construction time no buffer data is allocated, but the buffer data provided by the user is used. The buffer data must remain valid for as long as the buffer may be used, and the user is responsible to free the buffer data when no longer required. - -Similar to device objects, buffer objects are also reference-counted and can be retained and released by calling the following functions: - -:: - - void oidnRetainBuffer(OIDNBuffer buffer); - void oidnReleaseBuffer(OIDNBuffer buffer); - -The size of the buffer in bytes can be queried using - -:: - - size_t oidnGetBufferSize(OIDNBuffer buffer); - -Accessing the data stored in a buffer object is possible by mapping it into the address space of the application using - -:: - - void* oidnMapBuffer(OIDNBuffer buffer, OIDNAccess access, size_t byteOffset, size_t byteSize) - -where ``access`` is the desired access mode of the mapped memory, ``byteOffset`` is the offset to the beginning of the mapped memory region in bytes, and ``byteSize`` is the number of bytes to map. The function returns a pointer to the mapped buffer data. If the specified ``byteSize`` is 0, the maximum available amount of memory will be mapped. The ``access`` argument must be one of the access modes in the following table: - -.. table:: Access modes for memory regions mapped with ``oidnMapBuffer``, i.e., valid constants of type ``OIDNAccess``. - - +-------------------------------+---------------------------------------------------------------+ - | Name | Description | - +===============================+===============================================================+ - | ``OIDN_ACCESS_READ`` | read-only access | - +-------------------------------+---------------------------------------------------------------+ - | ``OIDN_ACCESS_WRITE`` | write-only access | - +-------------------------------+---------------------------------------------------------------+ - | ``OIDN_ACCESS_READ_WRITE`` | read and write access | - +-------------------------------+---------------------------------------------------------------+ - | ``OIDN_ACCESS_WRITE_DISCARD`` | write-only access but the previous contents will be discarded | - +-------------------------------+---------------------------------------------------------------+ - -After accessing the mapped data in the buffer, the memory region must be unmapped with - -:: - - void oidnUnmapBuffer(OIDNBuffer buffer, void* mappedPtr); - -where ``mappedPtr`` must be a pointer returned by a call to ``oidnMapBuffer`` for the specified buffer. Any change to the mapped data is guaranteed to take effect only after unmapping the memory region. - -It is also possible to get a pointer directly to the buffer data but this might be valid only on the device the buffer was created on: - -:: - - void* oidnGetBufferData(OIDNBuffer buffer); - -Data Format -~~~~~~~~~~~ - -Buffers store opaque data and thus have no information about the type and format of the data. Other objects, e.g. filters, typically require specifying the format of the data stored in buffers or shared via pointers. This can be done using the ``OIDNFormat`` enumeration type: - -.. table:: Supported data formats, i.e., valid constants of type ``OIDNFormat``. - - ========================== ========================================== - Name Description - ========================== ========================================== - ``OIDN_FORMAT_UNDEFINED`` undefined format - ``OIDN_FORMAT_FLOAT`` 32-bit floating-point scalar - ``OIDN_FORMAT_FLOAT[234]`` 32-bit floating-point [234]-element vector - ``OIDN_FORMAT_HALF`` 16-bit floating-point scalar - ``OIDN_FORMAT_HALF[234]`` 16-bit floating-point [234]-element vector - ========================== ========================================== - -Filter ------- - -Filters are the main objects in Intel Open Image Denoise that are responsible for the actual denoising. The library ships with a collection of filters which are optimized for different types of images and use cases. To create a filter object, call - -:: - - OIDNFilter oidnNewFilter(OIDNDevice device, const char* type); - -where ``type`` is the name of the filter type to create. The supported filter types are documented later in this section. Once created, filter objects can be retained and released with - -:: - - void oidnRetainFilter(OIDNFilter filter); - void oidnReleaseFilter(OIDNFilter filter); - -After creating a filter, it needs to be set up by specifying the input and output images, and potentially setting other parameter values as well. - -To bind images to the filter, you can use one of the following functions: - -:: - - void oidnSetFilterImage(OIDNFilter filter, const char* name, - OIDNBuffer buffer, OIDNFormat format, - size_t width, size_t height, - size_t byteOffset, - size_t bytePixelStride, size_t byteRowStride); - - void oidnSetSharedFilterImage(OIDNFilter filter, const char* name, - void* ptr, OIDNFormat format, - size_t width, size_t height, - size_t byteOffset, - size_t bytePixelStride, size_t byteRowStride); - -It is possible to specify either a data buffer object (``buffer`` argument) with the ``oidnSetFilterImage`` function, or directly a pointer to shared user-managed data (``ptr`` argument) with the ``oidnSetSharedFilterImage`` function. - -In both cases, you must also specify the name of the image parameter to set (``name`` argument, e.g. ``"color"``, ``"output"``), the pixel format (``format`` argument), the width and height of the image in number of pixels (``width`` and ``height`` arguments), the starting offset of the image data (``byteOffset`` argument), the pixel stride (``bytePixelStride`` argument) and the row stride (``byteRowStride`` argument), in number of bytes. - -The row stride must be an integer multiple of the pixel stride. If the pixels and/or rows are stored contiguously (tightly packed without any gaps), you can set ``bytePixelStride`` and/or ``byteRowStride`` to 0 to let the library compute the actual strides automatically, as a convenience. - -Images support only the ``OIDN_FORMAT_FLOAT3`` and ``OIDN_FORMAT_HALF3`` pixel formats. Custom image layouts with extra channels (e.g. alpha channel) or other data are supported as well by specifying a non-zero pixel stride. This way, expensive image layout conversion and copying can be avoided but the extra data will be ignored by the filter. - -To unbind a previously set image from the filter, call - -:: - - void oidnRemoveFilterImage(OIDNFilter filter, const char* name); - -Some special data used by filters are opaque/untyped (e.g. trained model weights blobs), which can be specified with the ``oidnSetSharedFilterData`` function: - -:: - - void oidnSetSharedFilterData(OIDNFilter filter, const char* name, - void* ptr, size_t byteSize); - -Modifying the contents of an opaque data parameter after binding it to a filter is allowed but the filter needs to be notified that the data has been updated by calling - -:: - - void oidnUpdateFilterData(OIDNFilter filter, const char* name); - -Unbinding opaque data from the filter can be performed with - -:: - - void oidnRemoveFilterData(OIDNFilter filter, const char* name); - -Filters may have parameters other than buffers as well, which you can set and get using the following functions: - -:: - - void oidnSetFilter1b(OIDNFilter filter, const char* name, bool value); - void oidnSetFilter1i(OIDNFilter filter, const char* name, int value); - void oidnSetFilter1f(OIDNFilter filter, const char* name, float value); - bool oidnGetFilter1b(OIDNFilter filter, const char* name); - int oidnGetFilter1i(OIDNFilter filter, const char* name); - float oidnGetFilter1f(OIDNFilter filter, const char* name); - -Filters support a progress monitor callback mechanism that can be used to report progress of filter operations and to cancel them as well. Calling ``oidnSetFilterProgressMonitorFunction`` registers a progress monitor callback function (``func`` argument) with payload (``userPtr`` argument) for the specified filter (``filter`` argument): - -:: - - typedef bool (*OIDNProgressMonitorFunction)(void* userPtr, double n); - - void oidnSetFilterProgressMonitorFunction(OIDNFilter filter, - OIDNProgressMonitorFunction func, - void* userPtr); - -Only a single callback function can be registered per filter, and further invocations overwrite the previously set callback function. Passing ``NULL`` as function pointer disables the registered callback function. Once registered, Intel Open Image Denoise will invoke the callback function multiple times during filter operations, by passing the payload as set at registration time (``userPtr`` argument), and a ``double`` in the range [0, 1] which estimates the progress of the operation (``n`` argument). When returning ``true`` from the callback function, Intel Open Image Denoise will continue the filter operation normally. When returning ``false``, the library will cancel the filter operation with the ``OIDN_ERROR_CANCELLED`` error code. - -After setting all necessary parameters for the filter, the changes must be commmitted by calling - -:: - - void oidnCommitFilter(OIDNFilter filter); - -The parameters can be updated after committing the filter, but it must be re-committed for any new changes to take effect. Committing major changes to the filter (e.g. setting new image parameters, changing the image resolution) can be expensive, and thus should not be done frequently (e.g. per frame). - -Finally, an image can be filtered by executing the filter with - -:: - - void oidnExecuteFilter(OIDNFilter filter); - -which will read the input image data from the specified buffers and produce the denoised output image. - -In the following we describe the different filters that are currently implemented in Intel Open Image Denoise. - -RT -~~ - -The ``RT`` (**r**\ ay **t**\ racing) filter is a generic ray tracing denoising filter which is suitable for denoising images rendered with Monte Carlo ray tracing methods like unidirectional and bidirectional path tracing. It supports depth of field and motion blur as well, but it is *not* temporally stable. The filter is based on a convolutional neural network (CNN), and it aims to provide a good balance between denoising performance and quality. The filter comes with a set of pre-trained CNN models that work well with a wide range of ray tracing based renderers and noise levels. - -[Example noisy beauty image rendered using unidirectional path tracing (4 samples per pixel). *Scene by Evermotion.*][imgMazdaColor] - -[Example output beauty image denoised using prefiltered auxiliary feature images (albedo and normal) too.][imgMazdaDenoised] - -For denoising *beauty* images, it accepts either a low dynamic range (LDR) or high dynamic range (HDR) image (``color``) as the main input image. In addition to this, it also accepts *auxiliary feature* images, ``albedo`` and ``normal``, which are optional inputs that usually improve the denoising quality significantly, preserving more details. - -It is possible to denoise auxiliary images as well, in which case only the respective auxiliary image has to be specified as input, instead of the beauty image. This can be done as a *prefiltering* step to further improve the quality of the denoised beauty image. - -The ``RT`` filter has certain limitations regarding the supported input images. Most notably, it cannot denoise images that were not rendered with ray tracing. Another important limitation is related to anti-aliasing filters. Most renderers use a high-quality pixel reconstruction filter instead of a trivial box filter to minimize aliasing artifacts (e.g. Gaussian, Blackman-Harris). The ``RT`` filter does support such pixel filters but only if implemented with importance sampling. Weighted pixel sampling (sometimes called *splatting*) introduces correlation between neighboring pixels, which causes the denoising to fail (the noise will not be filtered), thus it is not supported. - -The filter can be created by passing ``"RT"`` to the ``oidnNewFilter`` function as the filter type. The filter supports the parameters listed in the table below. All specified images must have the same dimensions. The output image can be one of the input images (i.e. in-place denoising is supported). See section `Examples <#examples>`__ for simple code snippets that demonstrate the usage of the filter. - -.. table:: Parameters supported by the ``RT`` filter. - - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +===============+=================+============+==================================================================================================================================================================================================================================================================================================================================================================================================+ - | ``Image`` | ``color`` | *optional* | input beauty image (3 channels, LDR values in [0, 1] or HDR values in [0, +∞), values being interpreted such that, after scaling with the ``inputScale`` parameter, a value of 1 corresponds to a luminance level of 100 cd/m²) | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``Image`` | ``albedo`` | *optional* | input auxiliary image containing the albedo per pixel (3 channels, values in [0, 1]) | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``Image`` | ``normal`` | *optional* | input auxiliary image containing the shading normal per pixel (3 channels, world-space or view-space vectors with arbitrary length, values in [-1, 1]) | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``Image`` | ``output`` | | output image (3 channels); can be one of the input images | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``bool`` | ``hdr`` | false | whether the main input image is HDR | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``bool`` | ``srgb`` | false | whether the main input image is encoded with the sRGB (or 2.2 gamma) curve (LDR only) or is linear; the output will be encoded with the same curve | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``float`` | ``inputScale`` | NaN | scales values in the main input image before filtering, without scaling the output too, which can be used to map color or auxiliary feature values to the expected range, e.g. for mapping HDR values to physical units (which affects the quality of the output but *not* the range of the output values); if set to NaN, the scale is computed implicitly for HDR images or set to 1 otherwise | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``bool`` | ``cleanAux`` | false | whether the auxiliary feature (albedo, normal) images are noise-free; recommended for highest quality but should *not* be enabled for noisy auxiliary images to avoid residual noise | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``Data`` | ``weights`` | *optional* | trained model weights blob | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``int`` | ``maxMemoryMB`` | 3000 | approximate maximum scratch memory to use in megabytes (actual memory usage may be higher); limiting memory usage may cause slower denoising due to internally splitting the image into overlapping tiles | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``const int`` | ``alignment`` | | when manually denoising in tiles, the tile size and offsets should be multiples of this amount of pixels to avoid artifacts; when denoising HDR images ``inputScale`` *must* be set by the user to avoid seam artifacts | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``const int`` | ``overlap`` | | when manually denoising in tiles, the tiles should overlap by this amount of pixels | - +---------------+-----------------+------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Using auxiliary feature images like albedo and normal helps preserving fine details and textures in the image thus can significantly improve denoising quality. These images should typically contain feature values for the first hit (i.e. the surface which is directly visible) per pixel. This works well for most surfaces but does not provide any benefits for reflections and objects visible through transparent surfaces (compared to just using the color as input). However, this issue can be usually fixed by storing feature values for a subsequent hit (i.e. the reflection and/or refraction) instead of the first hit. For example, it usually works well to follow perfect specular (*delta*) paths and store features for the first diffuse or glossy surface hit instead (e.g. for perfect specular dielectrics and mirrors). This can greatly improve the quality of reflections and transmission. We will describe this approach in more detail in the following subsections. - -The auxiliary feature images should be as noise-free as possible. It is not a strict requirement but too much noise in the feature images may cause residual noise in the output. Ideally, these should be completely noise-free. If this is the case, this should be hinted to the filter using the ``cleanAux`` parameter to ensure the highest possible image quality. But this parameter should be used with care: if enabled, any noise present in the auxiliary images will end up in the denoised image as well, as residual noise. Thus, ``cleanAux`` should be enabled only if the auxiliary images are guaranteed to be noise-free. - -Usually it is difficult to provide clean feature images, and some residual noise might be present in the output even with ``cleanAux`` being disabled. To eliminate this noise and to even improve the sharpness of texture details, the auxiliary images should be first denoised in a prefiltering step, as mentioned earlier. Then, these denoised auxiliary images could be used for denoising the beauty image. Since these are now noise-free, the ``cleanAux`` parameter should be enabled. See section `Denoising with prefiltering (C++11 API) <#denoising-with-prefiltering-c11-api>`__ for a simple code example. Prefiltering makes denoising much more expensive but if there are multiple color AOVs to denoise, the prefiltered auxiliary images can be reused for denoising multiple AOVs, amortizing the cost of the prefiltering step. - -Thus, for final frame denoising, where the best possible image quality is required, it is recommended to prefilter the auxiliary features if they are noisy and enable the ``cleanAux`` parameter. Denoising with noisy auxiliary features should be reserved for previews and interactive rendering. - -All auxiliary images should use the same pixel reconstruction filter as the beauty image. Using a properly anti-aliased beauty image but aliased albedo or normal images will likely introduce artifacts around edges. - -Albedo -^^^^^^ - -The albedo image is the feature image that usually provides the biggest quality improvement. It should contain the approximate color of the surfaces independent of illumination and viewing angle. - -[Example albedo image obtained using the first hit. Note that the albedos of all transparent surfaces are 1.][imgMazdaAlbedoFirstHit] - -[Example albedo image obtained using the first diffuse or glossy (non-delta) hit. Note that the albedos of perfect specular (delta) transparent surfaces are computed as the Fresnel blend of the reflected and transmitted albedos.][imgMazdaAlbedoNonDeltaHit] - -For simple matte surfaces this means using the diffuse color/texture as the albedo. For other, more complex surfaces it is not always obvious what is the best way to compute the albedo, but the denoising filter is flexible to a certain extent and works well with differently computed albedos. Thus it is not necessary to compute the strict, exact albedo values but must be always between 0 and 1. - -For metallic surfaces the albedo should be either the reflectivity at normal incidence (e.g. from the artist friendly metallic Fresnel model) or the average reflectivity; or if these are constant (not textured) or unknown, the albedo can be simply 1 as well. - -The albedo for dielectric surfaces (e.g. glass) should be either 1 or, if the surface is perfect specular (i.e. has a delta BSDF), the Fresnel blend of the reflected and transmitted albedos. The latter usually works better but only if it does not introduce too much noise or the albedo is prefiltered. If noise is an issue, we recommend to split the path into a reflected and a transmitted path at the first hit, and perhaps fall back to an albedo of 1 for subsequent dielectric hits. The reflected albedo in itself can be used for mirror-like surfaces as well. - -The albedo for layered surfaces can be computed as the weighted sum of the albedos of the individual layers. Non-absorbing clear coat layers can be simply ignored (or the albedo of the perfect specular reflection can be used as well) but absorption should be taken into account. - -Normal -^^^^^^ - -The normal image should contain the shading normals of the surfaces either in world-space or view-space. It is recommended to include normal maps to preserve as much detail as possible. - -[Example normal image obtained using the first hit (the values are actually in [−1, 1] but were mapped to [0, 1] for illustration purposes).][imgMazdaNormalFirstHit] - -[Example normal image obtained using the first diffuse or glossy (non-delta) hit. Note that the normals of perfect specular (delta) transparent surfaces are computed as the Fresnel blend of the reflected and transmitted normals.][imgMazdaNormalNonDeltaHit] - -Just like any other input image, the normal image should be anti-aliased (i.e. by accumulating the normalized normals per pixel). The final accumulated normals do not have to be normalized but must be in the [-1, 1] range (i.e. normals mapped to [0, 1] are *not* acceptable and must be remapped to [−1, 1]). - -Similar to the albedo, the normal can be stored for either the first or a subsequent hit (if the first hit has a perfect specular/delta BSDF). - -Weights -^^^^^^^ - -Instead of using the built-in trained models for filtering, it is also possible to specify user-trained models at runtime. This can be achieved by passing the model *weights* blob corresponding to the specified set of features and other filter parameters, produced by the included training tool. See Section [Training] for details. - -RTLightmap -~~~~~~~~~~ - -The ``RTLightmap`` filter is a variant of the ``RT`` filter optimized for denoising HDR and normalized directional (e.g. spherical harmonics) lightmaps. It does not support LDR images. - -The filter can be created by passing ``"RTLightmap"`` to the ``oidnNewFilter`` function as the filter type. The filter supports the following parameters: - -.. table:: Parameters supported by the ``RTLightmap`` filter. - - +---------------+-----------------+------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +===============+=================+============+=================================================================================================================================================================================================================================================================================================================================================================+ - | ``Image`` | ``color`` | | input beauty image (3 channels, HDR values in [0, +∞), interpreted such that, after scaling with the ``inputScale`` parameter, a value of 1 corresponds to aluminance level of 100 cd/m²; directional values in [-1, 1]) | - +---------------+-----------------+------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``Image`` | ``output`` | | output image (3 channels); can be one of the input images | - +---------------+-----------------+------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``bool`` | ``directional`` | false | whether the input contains normalized coefficients (in [-1, 1]) of a directional lightmap (e.g. normalized L1 or higher spherical harmonics band with the L0 band divided out); if the range of the coefficients is different from [-1, 1], the ``inputScale`` parameter can be used to adjust the range without changing the stored values | - +---------------+-----------------+------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``float`` | ``inputScale`` | NaN | scales input color values before filtering, without scaling the output too, which can be used to map color values to the expected range, e.g. for mapping HDR values to physical units (which affects the quality of the output but *not* the range of the output values); if set to NaN, the scale is computed implicitly for HDR images or set to 1 otherwise | - +---------------+-----------------+------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``Data`` | ``weights`` | *optional* | trained model weights blob | - +---------------+-----------------+------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``int`` | ``maxMemoryMB`` | 3000 | approximate maximum scratch memory to use in megabytes (actual memory usage may be higher); limiting memory usage may cause slower denoising due to internally splitting the image into overlapping tiles | - +---------------+-----------------+------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``const int`` | ``alignment`` | | when manually denoising in tiles, the tile size and offsets should be multiples of this amount of pixels to avoid artifacts; when denoising HDR images ``inputScale`` *must* be set by the user to avoid seam artifacts | - +---------------+-----------------+------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``const int`` | ``overlap`` | | when manually denoising in tiles, the tiles should overlap by this amount of pixels | - +---------------+-----------------+------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Training -======== - -The Intel Open Image Denoise source distribution includes a Python-based neural network training toolkit (located in the ``training`` directory), which can be used to train the denoising filter models with image datasets provided by the user. This is an advanced feature of the library which usage requires some background knowledge of machine learning and basic familiarity with deep learning frameworks and toolkits (e.g. PyTorch or TensorFlow, TensorBoard). - -The training toolkit consists of the following command-line scripts: - -- ``preprocess.py``: Preprocesses training and validation datasets. - -- ``train.py``: Trains a model using preprocessed datasets. - -- ``infer.py``: Performs inference on a dataset using the specified training result. - -- ``export.py``: Exports a training result to the runtime model weights format. - -- ``find_lr.py``: Tool for finding the optimal minimum and maximum learning rates. - -- ``visualize.py``: Invokes TensorBoard for visualizing statistics of a training result. - -- ``split_exr.py``: Splits a multi-channel EXR image into multiple feature images. - -- ``convert_image.py``: Converts a feature image to a different image format. - -- ``compare_image.py``: Compares two feature images using the specified quality metrics. - -Prerequisites -------------- - -Before you can run the training toolkit you need the following prerequisites: - -- Linux (other operating systems are currently not supported) - -- Python 3.7 or later - -- `PyTorch `__ 1.8 or later - -- `NumPy `__ 1.19 or later - -- `OpenImageIO `__ 2.1 or later - -- `TensorBoard `__ 2.4 or later (*optional*) - -Devices -------- - -Most scripts in the training toolkit support selecting what kind of device (e.g. CPU, GPU) to use for the computations (``--device`` or ``-d`` option). If multiple devices of the same kind are available (e.g. multiple GPUs), the user can specify which one of these to use (``--device_id`` or ``-k`` option). Additionally, some scripts, like ``train.py``, support data-parallel execution on multiple devices for faster performance (``--num_devices`` or ``-n`` option). - -Datasets --------- - -A dataset should consist of a collection of noisy and corresponding noise-free reference images. It is possible to have more than one noisy version of the same image in the dataset, e.g. rendered at different samples per pixel and/or using different seeds. - -The training toolkit expects to have all datasets (e.g. training, validation) in the same parent directory (e.g. ``data``). Each dataset is stored in its own subdirectory (e.g. ``train``, ``valid``), which can have an arbitrary name. - -The images must be stored in `OpenEXR `__ format (``.exr`` files), and the filenames must have a specific format but the files can be stored in an arbitrary directory structure inside the dataset directory. The only restriction is that all versions of an image (noisy images and the reference image) must be located in the same subdirectory. Each feature of an image (e.g. color, albedo) must be stored in a separate image file, i.e. multi-channel EXR image files are not supported. If you have multi-channel EXRs, you can split them into separate images per feature using the included ``split_exr.py`` tool. - -An image filename must consist of a base name, a suffix with the number of samples per pixel or whether it is the reference image (e.g. ``_0128spp``, ``_ref``), the feature type extension (e.g. ``.hdr``, ``.alb``), and the image format extension (``.exr``). The exact filename format as a regular expression is the following: - -.. code:: regexp - - .+_([0-9]+(spp)?|ref|reference|gt|target)\.(hdr|ldr|sh1[xyz]|alb|nrm)\.exr - -The number of samples per pixel should be padded with leading zeros to have a fixed number of digits. If the reference image is not explicitly named as such (i.e. has the number of samples instead), the image with the most samples per pixel will be considered the reference. - -The following image features are supported: - -.. table:: Image features supported by the training toolkit. - - +---------+-------------------------------------------+--------------+---------------------------------------------+ - | Feature | Description | Channels | File extension | - +=========+===========================================+==============+=============================================+ - | ``hdr`` | color (HDR) | 3 | ``.hdr.exr`` | - +---------+-------------------------------------------+--------------+---------------------------------------------+ - | ``ldr`` | color (LDR) | 3 | ``.ldr.exr`` | - +---------+-------------------------------------------+--------------+---------------------------------------------+ - | ``sh1`` | color (normalized L1 spherical harmonics) | 3 × 3 images | ``.sh1x.exr``, ``.sh1y.exr``, ``.sh1z.exr`` | - +---------+-------------------------------------------+--------------+---------------------------------------------+ - | ``alb`` | albedo | 3 | ``.alb.exr`` | - +---------+-------------------------------------------+--------------+---------------------------------------------+ - | ``nrm`` | normal | 3 | ``.nrm.exr`` | - +---------+-------------------------------------------+--------------+---------------------------------------------+ - -The following directory tree demonstrates an example root dataset directory (``data``) containing one dataset (``rt_train``) with HDR color and albedo feature images: - -:: - - data - `-- rt_train - |-- scene1 - | |-- view1_0001.alb.exr - | |-- view1_0001.hdr.exr - | |-- view1_0004.alb.exr - | |-- view1_0004.hdr.exr - | |-- view1_8192.alb.exr - | |-- view1_8192.hdr.exr - | |-- view2_0001.alb.exr - | |-- view2_0001.hdr.exr - | |-- view2_8192.alb.exr - | `-- view2_8192.hdr.exr - |-- scene2_000008spp.alb.exr - |-- scene2_000008spp.hdr.exr - |-- scene2_000064spp.alb.exr - |-- scene2_000064spp.hdr.exr - |-- scene2_reference.alb.exr - `-- scene2_reference.hdr.exr - -Preprocessing (preprocess.py) ------------------------------ - -Training and validation datasets can be used only after preprocessing them using the ``preprocess.py`` script. This will convert the specified training (``--train_data`` or ``-t`` option) and validation datasets (``--valid_data`` or ``-v`` option) located in the root dataset directory (``--data_dir`` or ``-D`` option) to a format that can be loaded more efficiently during training. All preprocessed datasets will be stored in a root preprocessed dataset directory (``--preproc_dir`` or ``-P`` option). - -The preprocessing script requires the set of image features to include in the preprocessed dataset as command-line arguments. Only these specified features will be available for training but it is not required to use all of them at the same time. Thus, a single preprocessed dataset can be reused for training multiple models with different combinations of the preprocessed features. - -By default, all input features are assumed to be noisy, including the auxiliary features (e.g. albedo, normal), each having versions at different samples per pixel. However, it is also possible to train with noise-free auxiliary features, in which case the reference auxiliary features are used instead of the various noisy ones (``--clean_aux`` option). - -Preprocessing also depends on the filter that will be trained (e.g. determines which HDR/LDR transfer function has to be used), which should be also specified (``--filter`` or ``-f`` option). The alternative is to manually specify the transfer function (``--transfer`` or ``-x`` option) and other filter-specific parameters, which could be useful for training custom filters. - -For example, to preprocess the training and validation datasets (``rt_train`` and ``rt_valid``) with HDR color, albedo, and normal image features, for training the ``RT`` filter, the following command can be used: - -:: - - ./preprocess.py hdr alb nrm --filter RT --train_data rt_train --valid_data rt_valid - -It is possible to preprocess the same dataset multiple times, with possibly different combinations of features and options. The training script will use the most suitable and most recent preprocessed version depending on the training parameters. - -For more details about using the preprocessing script, including other options, please have a look at the help message: - -:: - - ./preprocess.py -h - -Training (train.py) -------------------- - -The filters require separate trained models for each supported combination of input features. Thus, depending on which combinations of features the user wants to support for a particular filter, one or more models have to be trained. - -After preprocessing the datasets, it is possible to start training a model using the ``train.py`` script. Similar to the preprocessing script, the input features must be specified (could be a subset of the preprocessed features), and the dataset names, directory paths, and the filter can be also passed. - -The tool will produce a training *result*, the name of which can be either specified (``--result`` or ``-r`` option) or automatically generated (by default). Each result is stored in its own subdirectory, and these are located in a common parent directory (``--results_dir`` or ``-R`` option). If a training result already exists, the tool will resume training that result from the latest checkpoint. - -The default training hyperparameters should work reasonably well in general, but some adjustments might be necessary for certain datasets to attain optimal performance, most importantly: the number of epochs (``--num_epochs`` or ``-e`` option), the global mini-batch size (``--batch_size`` or ``-b`` option), and the learning rate. The training tool uses a one-cycle learning rate schedule with cosine annealing, which can be configured by setting the base learning rate (``--learning_rate`` or ``--lr`` option), the maximum learning rate (``--max_learning_rate`` or ``--max_lr`` option), and the percentage of the cycle spent increasing the learning rate (``--learning_rate_warmup`` or ``--lr_warmup`` option). - -Example usage: - -:: - - ./train.py hdr alb --filter RT --train_data rt_train --valid_data rt_valid --result rt_hdr_alb - -For finding the optimal learning rate range, we recommend using the included ``find_lr.py`` script, which trains one epoch using an increasing learning rate and logs the resulting losses in a comma-separated values (CSV) file. Plotting the loss curve can show when the model starts to learn (the base learning rate) and when it starts to diverge (the maximum learning rate). - -The model is evaluated with the validation dataset at regular intervals (``--num_valid_epochs`` option), and checkpoints are also regularly created (``--num_save_epochs`` option) to save training progress. Also, some statistics are logged (e.g. training and validation losses, learning rate) per epoch, which can be later visualized with TensorBoard by running the ``visualize.py`` script, e.g.: - -:: - - ./visualize.py --result rt_hdr_alb - -Training is performed with mixed precision (FP16 and FP32) by default, if it supported by the hardware, which makes training faster and use less memory. However, in some rare cases this might cause some convergence issues. The training precision can be manually set to FP32 if necessary (``--precision`` or ``-p`` option). - -Inference (infer.py) --------------------- - -A training result can be tested by performing inference on an image dataset (``--input_data`` or ``-i`` option) using the ``infer.py`` script. The dataset does *not* have to be preprocessed. In addition to the result to use, it is possible to specify which checkpoint to load as well (``-e`` or ``--num_epochs`` option). By default the latest checkpoint is loaded. - -The tool saves the output images in a separate directory (``--output_dir`` or ``-O`` option) in the requested formats (``--format`` or ``-F`` option). It also evaluates a set of image quality metrics (``--metric`` or ``-M`` option), e.g. PSNR, SSIM, for images that have reference images available. All metrics are computed in tonemapped non-linear sRGB space. Thus, HDR images are first tonemapped (with Naughty Dog’s Filmic Tonemapper from John Hable’s *Uncharted 2: HDR Lighting* presentation) and converted to sRGB before evaluating the metrics. - -Example usage: - -:: - - ./infer.py --result rt_hdr_alb --input_data rt_test --format exr png --metric ssim - -The inference tool supports prefiltering of auxiliary features as well, which can be performed by specifying the list of training results for each feature to prefilter (``--aux_results`` or ``-a`` option). This is primarily useful for evaluating the quality of models trained with clean auxiliary features. - -Exporting Results (export.py) ------------------------------ - -The training result produced by the ``train.py`` script cannot be immediately used by the main library. It has to be first exported to the runtime model weights format, a *Tensor Archive* (TZA) file. Running the ``export.py`` script for a training result (and optionally a checkpoint epoch) will create a binary ``.tza`` file in the directory of the result, which can be either used at runtime through the API or it can be included in the library build by replacing one of the built-in weights files. - -Example usage: - -:: - - ./export.py --result rt_hdr_alb - -Image Conversion and Comparison -------------------------------- - -In addition to the already mentioned ``split_exr.py`` script, the toolkit contains a few other image utilities as well. - -``convert_image.py`` converts a feature image to a different image format (and/or a different feature, e.g. HDR color to LDR), performing tonemapping and other transforms as well if needed. For HDR images the exposure can be adjusted by passing a linear exposure scale (``--exposure`` or ``-E`` option). Example usage: - -:: - - ./convert_image.py view1_0004.hdr.exr view1_0004.png --exposure 2.5 - -The ``compare_image.py`` script compares two feature images (preferably having the dataset filename format to correctly detect the feature) using the specified image quality metrics, similar to the ``infer.py`` tool. Example usage: - -:: - - ./compare_image.py view1_0004.hdr.exr view1_8192.hdr.exr --exposure 2.5 --metric mse ssim - diff --git a/source/elements/oneART/source/oidn.rst b/source/elements/oneART/source/oidn.rst deleted file mode 100644 index ab4b63c4e..000000000 --- a/source/elements/oneART/source/oidn.rst +++ /dev/null @@ -1,21 +0,0 @@ -.. SPDX-FileCopyrightText: 2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _oidn-section: - -================== -Open Image Denoise -================== - -Open Image Denoise is an open, high-quality, -efficient, and easy-to-use denoising library that allows one to -significantly reduce rendering times in ray tracing based rendering -applications. - - -.. toctree:: - :maxdepth: 1 - - oidn-intro - oidn-spec diff --git a/source/elements/oneART/source/openvkl-intro.rst b/source/elements/oneART/source/openvkl-intro.rst deleted file mode 100644 index 80f4eb89f..000000000 --- a/source/elements/oneART/source/openvkl-intro.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. SPDX-FileCopyrightText: 2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -============ -Introduction -============ - -Open Volume Kernel Library (Open VKL) is a collection of -high-performance volume computation kernels. The target users of Open -VKL are graphics application engineers who want to improve the -performance of their volume rendering applications by leveraging Open -VKL’s performance-optimized kernels, which include volume traversal -and sampling functionality for a variety of volumetric data -formats. - -Open VKL provides a C API, and also supports applications written with -the Intel® SPMD Program Compiler (ISPC) by also providing an ISPC -interface to the core volume algorithms. This makes it possible to -write a renderer in ISPC that automatically vectorizes and leverages -SSE, AVX, AVX2, and AVX-512 instructions. ISPC also supports runtime -code selection, thus ISPC will select the best code path for your -application. diff --git a/source/elements/oneART/source/openvkl-spec.rst b/source/elements/oneART/source/openvkl-spec.rst deleted file mode 100644 index bd116a691..000000000 --- a/source/elements/oneART/source/openvkl-spec.rst +++ /dev/null @@ -1,1379 +0,0 @@ -Open VKL API -============ - -To access the Open VKL API you first need to include the Open VKL header. For C99 or C++: - -:: - - #include - -For the Intel® Implicit SPMD Program Compiler (Intel® ISPC): - -:: - - #include - -This documentation will discuss the C99/C++ API. The ISPC version has the same functionality and flavor. Looking at the headers, the ``vklTutorialISPC`` example, and this documentation should be enough to figure it out. - -Device initialization and shutdown ----------------------------------- - -To use the API, one of the implemented backends must be loaded. Currently the only one that exists is the CPU device. To load the module that implements the CPU device: - -:: - - vklLoadModule("cpu_device"); - -The device then needs to be instantiated: - -:: - - VKLDevice device = vklNewDevice("cpu"); - -By default, the CPU device selects the maximum supported SIMD width (and associated ISA) for the system. Optionally, a specific width may be requested using the ``cpu_4``, ``cpu_8``, or ``cpu_16`` device names. Note that the system must support the given width (SSE4.1 for 4-wide, AVX for 8-wide, and AVX512 for 16-wide). - -Once a device is created, you can call - -:: - - void vklDeviceSetInt(VKLDevice, const char *name, int val); - void vklDeviceSetString(VKLDevice, const char *name, const char *val); - -to set parameters on the device. The following parameters are understood by all devices: - -.. table:: Parameters shared by all devices. - - +--------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Description | - +========+================+=======================================================================================================================================================================+ - | int | logLevel | logging level; valid values are ``VKL_LOG_DEBUG``, ``VKL_LOG_INFO``, ``VKL_LOG_WARNING``, ``VKL_LOG_ERROR`` and ``VKL_LOG_NONE`` | - +--------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | string | logOutput | convenience for setting where log messages go; valid values are ``cout``, ``cerr`` and ``none`` | - +--------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | string | errorOutput | convenience for setting where error messages go; valid values are ``cout``, ``cerr`` and ``none`` | - +--------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | int | numThreads | number of threads which Open VKL can use | - +--------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | int | flushDenormals | sets the ``Flush to Zero`` and ``Denormals are Zero`` mode of the MXCSR control and status register (default: 1); see Performance Recommendations section for details | - +--------+----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Once parameters are set, the device must be committed with - -:: - - vklCommitDevice(device); - -The newly committed device is then ready to use. Users may change parameters on a device after initialization. In this case the device would need to be re-committed. - -All Open VKL objects are associated with a device. A device handle must be explicitly provided when creating volume and data objects, via ``vklNewVolume()`` and ``vklNewData()`` respectively. Other object types are automatically associated with a device via transitive dependency on a volume. - -Open VKL provides vector-wide versions for several APIs. To determine the native vector width for a given device, call: - -:: - - int width = vklGetNativeSIMDWidth(VKLDevice device); - -When the application is finished with an Open VKL device or shutting down, release the device via: - -:: - - vklReleaseDevice(VKLDevice device); - -Environment variables -~~~~~~~~~~~~~~~~~~~~~ - -The generic device parameters can be overridden via environment variables for easy changes to Open VKL’s behavior without needing to change the application (variables are prefixed by convention with “``OPENVKL_``”): - -.. table:: Environment variables understood by all devices. - - +-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Variable | Description | - +=========================+=======================================================================================================================================================================+ - | OPENVKL_LOG_LEVEL | logging level; valid values are ``debug``, ``info``, ``warning``, ``error`` and ``none`` | - +-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OPENVKL_LOG_OUTPUT | convenience for setting where log messages go; valid values are ``cout``, ``cerr`` and ``none`` | - +-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OPENVKL_ERROR_OUTPUT | convenience for setting where error messages go; valid values are ``cout``, ``cerr`` and ``none`` | - +-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OPENVKL_THREADS | number of threads which Open VKL can use | - +-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OPENVKL_FLUSH_DENORMALS | sets the ``Flush to Zero`` and ``Denormals are Zero`` mode of the MXCSR control and status register (default: 1); see Performance Recommendations section for details | - +-------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Note that these environment variables take precedence over values set through the ``vklDeviceSet*()`` functions. - -Additionally, the CPU device’s default SIMD width can be overriden at run time with the ``OPENVKL_CPU_DEVICE_DEFAULT_WIDTH`` environment variable. Legal values are 4, 8, or 16. This setting is only applicable when the generic ``cpu`` device is instantiated; if a specific width is requested via the ``cpu_[4,8,16]`` device names then the environment variable is ignored. - -Error handling and log messages -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following errors are currently used by Open VKL: - -.. table:: Possible error codes, i.e., valid named constants of type ``VKLError``. - - +-----------------------+-------------------------------------------------------+ - | Name | Description | - +=======================+=======================================================+ - | VKL_NO_ERROR | no error occurred | - +-----------------------+-------------------------------------------------------+ - | VKL_UNKNOWN_ERROR | an unknown error occurred | - +-----------------------+-------------------------------------------------------+ - | VKL_INVALID_ARGUMENT | an invalid argument was specified | - +-----------------------+-------------------------------------------------------+ - | VKL_INVALID_OPERATION | the operation is not allowed for the specified object | - +-----------------------+-------------------------------------------------------+ - | VKL_OUT_OF_MEMORY | there is not enough memory to execute the command | - +-----------------------+-------------------------------------------------------+ - | VKL_UNSUPPORTED_CPU | the CPU is not supported (minimum ISA is SSE4.1) | - +-----------------------+-------------------------------------------------------+ - -These error codes are either directly returned by some API functions, or are recorded to be later queried by the application via - -:: - - VKLError vklDeviceGetLastErrorCode(VKLDevice); - -A more descriptive error message can be queried by calling - -:: - - const char* vklDeviceGetLastErrorMsg(VKLDevice); - -Alternatively, the application can also register a callback function of type - -:: - - typedef void (*VKLErrorCallback)(void *, VKLError, const char* message); - -via - -:: - - void vklDeviceSetErrorCallback(VKLDevice, VKLErrorFunc, void *); - -to get notified when errors occur. Applications may be interested in messages which Open VKL emits, whether for debugging or logging events. Applications can register a callback function of type - -:: - - typedef void (*VKLLogCallback)(void *, const char* message); - -via - -:: - - void vklDeviceSetLogCallback(VKLDevice, VKLLogCallback, void *); - -which Open VKL will use to emit log messages. Applications can clear either callback by passing ``nullptr`` instead of an actual function pointer. By default, Open VKL uses ``cout`` and ``cerr`` to emit log and error messages, respectively. The last parameter to ``vklDeviceSetErrorCallback`` and ``vklDeviceSetLogCallback`` is a user data pointer. Open VKL passes this pointer to the callback functions as the first parameter. Note that in addition to setting the above callbacks, this behavior can be changed via the device parameters and environment variables described previously. - -Basic data types ----------------- - -Open VKL defines 3-component vectors of integer and vector types: - -:: - - typedef struct - { - int x, y, z; - } vkl_vec3i; - - typedef struct - { - float x, y, z; - } vkl_vec3f; - -Vector versions of these are also defined in structure-of-array format for 4, 8, and 16 wide types. - -:: - - typedef struct - { - float x[WIDTH]; - float y[WIDTH]; - float z[WIDTH]; - } vkl_vvec3f##WIDTH; - - typedef struct - { - float lower[WIDTH], upper[WIDTH]; - } vkl_vrange1f##WIDTH; - -1-D range and 3-D ranges are defined as ranges and boxes, with no vector versions: - -:: - - typedef struct - { - float lower, upper; - } vkl_range1f; - - typedef struct - { - vkl_vec3f lower, upper; - } vkl_box3f; - -Object model ------------- - -Objects in Open VKL are exposed to the APIs as handles with internal reference counting for lifetime determination. Objects are created with particular type’s ``vklNew...`` API entry point. For example, ``vklNewData`` and ``vklNewVolume``. - -In general, modifiable parameters to objects are modified using ``vklSet...`` functions based on the type of the parameter being set. The parameter name is passed as a string. Below are all variants of ``vklSet...``. - -:: - - void vklSetBool(VKLObject object, const char *name, int b); - void vklSetFloat(VKLObject object, const char *name, float x); - void vklSetVec3f(VKLObject object, const char *name, float x, float y, float z); - void vklSetInt(VKLObject object, const char *name, int x); - void vklSetVec3i(VKLObject object, const char *name, int x, int y, int z); - void vklSetData(VKLObject object, const char *name, VKLData data); - void vklSetString(VKLObject object, const char *name, const char *s); - void vklSetVoidPtr(VKLObject object, const char *name, void *v); - -After parameters have been set, ``vklCommit`` must be called on the object to make them take effect. - -Open VKL uses reference counting to manage the lifetime of all objects. Therefore one cannot explicitly “delete” any object. Instead, one can indicate the application does not need or will not access the given object anymore by calling - -:: - - void vklRelease(VKLObject); - -This decreases the object’s reference count. If the count reaches ``0`` the object will automatically be deleted. - -Managed data ------------- - -Large data is passed to Open VKL via a ``VKLData`` handle created with ``vklNewData``: - -:: - - VKLData vklNewData(VKLDevice device, - size_t numItems, - VKLDataType dataType, - const void *source, - VKLDataCreationFlags dataCreationFlags, - size_t byteStride); - -Data objects can be created as Open VKL owned (``dataCreationFlags = VKL_DATA_DEFAULT``), in which the library will make a copy of the data for its use, or shared (``dataCreationFlags = VKL_DATA_SHARED_BUFFER``), which will try to use the passed pointer for usage. The library is allowed to copy data when a volume is committed. - -The distance between consecutive elements in ``source`` is given in bytes with ``byteStride``. If the provided ``byteStride`` is zero, then it will be determined automatically as ``sizeof(type)``. Open VKL owned data will be compacted into a naturally-strided array on copy, regardless of the original ``byteStride``. - -As with other object types, when data objects are no longer needed they should be released via ``vklRelease``. - -The enum type ``VKLDataType`` describes the different element types that can be represented in Open VKL. The types accepted vary per volume; see the volume section for specifics. Valid constants are listed in the table below. - -.. table:: Valid named constants for ``VKLDataType``. - - +----------------------------+----------------------------------------------------------------------------------------------+ - | Type/Name | Description | - +============================+==============================================================================================+ - | VKL_DEVICE | API device object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_DATA | data reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_OBJECT | generic object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_VOLUME | volume object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_STRING | C-style zero-terminated character string | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_CHAR, VKL_VEC[234]C | 8 bit signed character scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_UCHAR, VKL_VEC[234]UC | 8 bit unsigned character scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_SHORT, VKL_VEC[234]S | 16 bit unsigned integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_USHORT, VKL_VEC[234]US | 16 bit unsigned integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_INT, VKL_VEC[234]I | 32 bit signed integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_UINT, VKL_VEC[234]UI | 32 bit unsigned integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_LONG, VKL_VEC[234]L | 64 bit signed integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_ULONG, VKL_VEC[234]UL | 64 bit unsigned integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_HALF, VKL_VEC[234]H | 16 bit half precision floating-point scalar and [234]-element vector (IEEE 754 ``binary16``) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_FLOAT, VKL_VEC[234]F | 32 bit single precision floating-point scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_DOUBLE, VKL_VEC[234]D | 64 bit double precision floating-point scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_BOX[1234]I | 32 bit integer box (lower + upper bounds) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_BOX[1234]F | 32 bit single precision floating-point box (lower + upper bounds) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_LINEAR[23]F | 32 bit single precision floating-point linear transform ([23] vectors) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_AFFINE[23]F | 32 bit single precision floating-point affine transform (linear transform plus translation) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | VKL_VOID_PTR | raw memory address | - +----------------------------+----------------------------------------------------------------------------------------------+ - -Observers ---------- - -Volumes and samplers in Open VKL may provide observers to communicate data back to the application. Observers may be created with - -:: - - VKLObserver vklNewSamplerObserver(VKLSampler sampler, - const char *type); - - VKLObserver vklNewVolumeObserver(VKLVolume volume, - const char *type); - -The object passed to ``vklNew*Observer`` must already be committed. Valid observer type strings are defined by volume implementations (see section ‘Volume types’ below). - -``vklNew*Observer`` returns ``NULL`` on failure. - -To access the underlying data, an observer must first be mapped using - -:: - - const void * vklMapObserver(VKLObserver observer); - -If this fails, the function returns ``NULL``. ``vklMapObserver`` may fail on observers that are already mapped. On success, the application may query the underlying type, element size in bytes, and the number of elements in the buffer using - -:: - - VKLDataType vklGetObserverElementType(VKLObserver observer); - size_t vklGetObserverElementSize(VKLObserver observer); - size_t vklGetObserverNumElements(VKLObserver observer); - -On failure, these functions return ``VKL_UNKNOWN`` and ``0``, respectively. Possible data types are defined by the volume that provides the observer , as are the semantics of the observation. See section ‘Volume types’ for details. - -The pointer returned by ``vklMapObserver`` may be cast to the type corresponding to the value returned by ``vklGetObserverElementType`` to access the observation. For example, if ``vklGetObserverElementType`` returns ``VKL_FLOAT``, then the pointer returned by ``vklMapObserver`` may be cast to ``const float *`` to access up to ``vklGetObserverNumElements`` consecutive values of type ``float``. - -Once the application has finished processing the observation, it should unmap the observer using - -:: - - void vklUnmapObserver(VKLObserver observer); - -so that the observer may be mapped again. - -When an observer is no longer needed, it should be released using ``vklRelease``. - -The observer API is not thread safe, and these functions should not be called concurrently on the same object. - -Volume types ------------- - -Open VKL currently supports structured volumes on regular and spherical grids; unstructured volumes with tetrahedral, wedge, pyramid, and hexaderal primitive types; adaptive mesh refinement (AMR) volumes; sparse VDB volumes; and particle volumes. Volumes are created with ``vklNewVolume`` with a device and appropriate type string: - -:: - - VKLVolume vklNewVolume(VKLDevice device, const char *type); - -In addition to the usual ``vklSet...()`` and ``vklCommit()`` APIs, the volume bounding box can be queried: - -:: - - vkl_box3f vklGetBoundingBox(VKLVolume volume); - -The number of attributes in a volume can also be queried: - -:: - - unsigned int vklGetNumAttributes(VKLVolume volume); - -Finally, the value range of the volume for a given attribute can be queried: - -:: - - vkl_range1f vklGetValueRange(VKLVolume volume, unsigned int attributeIndex); - -Structured Volumes -~~~~~~~~~~~~~~~~~~ - -Structured volumes only need to store the values of the samples, because their addresses in memory can be easily computed from a 3D position. The dimensions for all structured volume types are in units of vertices, not cells. For example, a volume with dimensions :math:`(x, y, z)` will have :math:`(x-1, y-1, z-1)` cells in each dimension. Voxel data provided is assumed vertex-centered, so :math:`x*y*z` values must be provided. - -Structured Regular Volumes -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A common type of structured volumes are regular grids, which are created by passing a type string of ``"structuredRegular"`` to ``vklNewVolume``. The parameters understood by structured regular volumes are summarized in the table below. - -.. table:: Configuration parameters for structured regular (``"structuredRegular"``) volumes. - - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +===================+==================================+==================================+========================================================================================================================================================================================================================================+ - | vec3i | dimensions | | number of voxels in each dimension :math:`(x, y, z)` | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | VKLData VKLData[] | data | | VKLData object(s) of voxel data, supported types are: | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_UCHAR`` | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_SHORT`` | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_USHORT`` | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_HALF`` | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_FLOAT`` | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_DOUBLE`` | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | Multiple attributes are supported through passing an array of VKLData objects. | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | gridOrigin | :math:`(0, 0, 0)` | origin of the grid in object space | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | gridSpacing | :math:`(1, 1, 1)` | size of the grid cells in object space | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint32 | temporalFormat | ``VKL_TEMPORAL_FORMAT_CONSTANT`` | The temporal format for this volume. Use ``VKLTemporalFormat`` for named constants. Structured regular volumes support ``VKL_TEMPORAL_FORMAT_CONSTANT``, ``VKL_TEMPORAL_FORMAT_STRUCTURED``, and ``VKL_TEMPORAL_FORMAT_UNSTRUCTURED``. | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | int | temporallyStructuredNumTimesteps | | For temporally structured variation, number of timesteps per voxel. Only valid if ``temporalFormat`` is ``VKL_TEMPORAL_FORMAT_STRUCTURED``. | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint32[] uint64[] | temporallyUnstructuredIndices | | For temporally unstructured variation, indices to ``data`` time series beginning per voxel. Only valid if ``temporalFormat`` is ``VKL_TEMPORAL_FORMAT_UNSTRUCTURED``. | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | temporallyUnstructuredTimes | | For temporally unstructured variation, time values corresponding to values in ``data``. Only valid if ``temporalFormat`` is ``VKL_TEMPORAL_FORMAT_UNSTRUCTURED``. | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | background | ``VKL_BACKGROUND_UNDEFINED`` | For each attribute, the value that is returned when sampling an undefined region outside the volume domain. | - +-------------------+----------------------------------+----------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Structured regular volumes support temporally structured and temporally unstructured temporal variation. See section ‘Temporal Variation’ for more detail. - -The following additional parameters can be set both on ``"structuredRegular"`` volumes and their sampler objects. Sampler object parameters default to volume parameters. - -.. table:: Configuration parameters for structured regular (``"structuredRegular"``) volumes and their sampler objects. - - +--------+----------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +========+================+==========================+===================================================================================================================+ - | int | filter | ``VKL_FILTER_TRILINEAR`` | The filter used for reconstructing the field. Use ``VKLFilter`` for named constants. | - +--------+----------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - | int | gradientFilter | ``filter`` | The filter used for reconstructing the field during gradient computations. Use ``VKLFilter`` for named constants. | - +--------+----------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - -Reconstruction filters -'''''''''''''''''''''' - -Structured regular volumes support the filter types ``VKL_FILTER_NEAREST``, ``VKL_FILTER_TRILINEAR``, and ``VKL_FILTER_TRICUBIC`` for both ``filter`` and ``gradientFilter``. - -Note that when ``gradientFilter`` is set to ``VKL_FILTER_NEAREST``, gradients are always :math:`(0, 0, 0)`. - -Structured Spherical Volumes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Structured spherical volumes are also supported, which are created by passing a type string of ``"structuredSpherical"`` to ``vklNewVolume``. The grid dimensions and parameters are defined in terms of radial distance (:math:`r`), inclination angle (:math:`\theta`), and azimuthal angle (:math:`\phi`), conforming with the ISO convention for spherical coordinate systems. The coordinate system and parameters understood by structured spherical volumes are summarized below. - -.. figure:: images/structured_spherical_coords.png - :alt: Structured spherical volume coordinate system: radial distance (:math:`r`), inclination angle (:math:`\theta`), and azimuthal angle (:math:`\phi`). - - Structured spherical volume coordinate system: radial distance (:math:`r`), inclination angle (:math:`\theta`), and azimuthal angle (:math:`\phi`). - -.. table:: Configuration parameters for structured spherical (``"structuredSpherical"``) volumes. - - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +===================+=============+==============================+=============================================================================================================+ - | vec3i | dimensions | | number of voxels in each dimension :math:`(r, \theta, \phi)` | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | VKLData VKLData[] | data | | VKLData object(s) of voxel data, supported types are: | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_UCHAR`` | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_SHORT`` | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_USHORT`` | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_HALF`` | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_FLOAT`` | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_DOUBLE`` | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | | | | Multiple attributes are supported through passing an array of VKLData objects. | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | vec3f | gridOrigin | :math:`(0, 0, 0)` | origin of the grid in units of :math:`(r, \theta, \phi)`; angles in degrees | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | vec3f | gridSpacing | :math:`(1, 1, 1)` | size of the grid cells in units of :math:`(r, \theta, \phi)`; angles in degrees | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - | float[] | background | ``VKL_BACKGROUND_UNDEFINED`` | For each attribute, the value that is returned when sampling an undefined region outside the volume domain. | - +-------------------+-------------+------------------------------+-------------------------------------------------------------------------------------------------------------+ - -These grid parameters support flexible specification of spheres, hemispheres, spherical shells, spherical wedges, and so forth. The grid extents (computed as :math:`[gridOrigin, gridOrigin + (dimensions - 1) * gridSpacing]`) however must be constrained such that: - -- :math:`r \geq 0` -- :math:`0 \leq \theta \leq 180` -- :math:`0 \leq \phi \leq 360` - -The following additional parameters can be set both on ``"structuredSpherical"`` volumes and their sampler objects. Sampler object parameters default to volume parameters. - -.. table:: Configuration parameters for structured spherical (``"structuredSpherical"``) volumes and their sampler objects. - - +--------+----------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +========+================+==========================+===================================================================================================================+ - | int | filter | ``VKL_FILTER_TRILINEAR`` | The filter used for reconstructing the field. Use ``VKLFilter`` for named constants. | - +--------+----------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - | int | gradientFilter | ``filter`` | The filter used for reconstructing the field during gradient computations. Use ``VKLFilter`` for named constants. | - +--------+----------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - -Adaptive Mesh Refinement (AMR) Volumes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Open VKL currently supports block-structured (Berger-Colella) AMR volumes. Volumes are specified as a list of blocks, which exist at levels of refinement in potentially overlapping regions. Blocks exist in a tree structure, with coarser refinement level blocks containing finer blocks. The cell width is equal for all blocks at the same refinement level, though blocks at a coarser level have a larger cell width than finer levels. - -There can be any number of refinement levels and any number of blocks at any level of refinement. - -Blocks are defined by three parameters: their bounds, the refinement level in which they reside, and the scalar data contained within each block. - -Note that cell widths are defined *per refinement level*, not per block. - -AMR volumes are created by passing the type string ``"amr"`` to ``vklNewVolume``, and have the following parameters: - -.. table:: Configuration parameters for AMR (``"amr"``) volumes. - - +-----------+--------------+------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +===========+==============+==============================+======================================================================================================================================+ - | float[] | cellWidth | | [data] array of each level’s cell width | - +-----------+--------------+------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | box3i[] | block.bounds | | [data] array of each block’s bounds (in voxels) | - +-----------+--------------+------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | int[] | block.level | | [data] array of each block’s refinement level | - +-----------+--------------+------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | VKLData[] | block.data | | [data] array of each block’s VKLData object containing the actual scalar voxel data. Currently only ``VKL_FLOAT`` data is supported. | - +-----------+--------------+------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | gridOrigin | :math:`(0, 0, 0)` | origin of the grid in object space | - +-----------+--------------+------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | gridSpacing | :math:`(1, 1, 1)` | size of the grid cells in object space | - +-----------+--------------+------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - | float | background | ``VKL_BACKGROUND_UNDEFINED`` | The value that is returned when sampling an undefined region outside the volume domain. | - +-----------+--------------+------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+ - -Note that the ``gridOrigin`` and ``gridSpacing`` parameters act just like the structured volume equivalent, but they only modify the root (coarsest level) of refinement. - -The following additional parameters can be set both on ``"amr"`` volumes and their sampler objects. Sampler object parameters default to volume parameters. - -.. table:: Configuration parameters for AMR (``"AMR"``) volumes and their sampler objects. - - +------------------+-----------+---------------------+----------------------------------------------------------+ - | Type | Name | Default | Description | - +==================+===========+=====================+==========================================================+ - | ``VKLAMRMethod`` | method | ``VKL_AMR_CURRENT`` | ``VKLAMRMethod`` sampling method. Supported methods are: | - +------------------+-----------+---------------------+----------------------------------------------------------+ - | | | | ``VKL_AMR_CURRENT`` | - +------------------+-----------+---------------------+----------------------------------------------------------+ - | | | | ``VKL_AMR_FINEST`` | - +------------------+-----------+---------------------+----------------------------------------------------------+ - | | | | ``VKL_AMR_OCTANT`` | - +------------------+-----------+---------------------+----------------------------------------------------------+ - -Open VKL’s AMR implementation was designed to cover Berger-Colella [1] and Chombo [2] AMR data. The ``method`` parameter above determines the interpolation method used when sampling the volume. - -- ``VKL_AMR_CURRENT`` finds the finest refinement level at that cell and interpolates through this “current” level -- ``VKL_AMR_FINEST`` will interpolate at the closest existing cell in the volume-wide finest refinement level regardless of the sample cell’s level -- ``VKL_AMR_OCTANT`` interpolates through all available refinement levels at that cell. This method avoids discontinuities at refinement level boundaries at the cost of performance - -Gradients are computed using finite differences, using the ``method`` defined on the sampler. - -Details and more information can be found in the publication for the implementation [3]. - -1. M. J. Berger, and P. Colella. “Local adaptive mesh refinement for shock hydrodynamics.” Journal of Computational Physics 82.1 (1989): 64-84. DOI: 10.1016/0021-9991(89)90035-1 -2. M. Adams, P. Colella, D. T. Graves, J.N. Johnson, N.D. Keen, T. J. Ligocki. D. F. Martin. P.W. McCorquodale, D. Modiano. P.O. Schwartz, T.D. Sternberg and B. Van Straalen, Chombo Software Package for AMR Applications - Design Document, Lawrence Berkeley National Laboratory Technical Report LBNL-6616E. -3. I. Wald, C. Brownlee, W. Usher, and A. Knoll. CPU volume rendering of adaptive mesh refinement data. SIGGRAPH Asia 2017 Symposium on Visualization on - SA ’17, 18(8), 1–8. DOI: 10.1145/3139295.3139305 - -Unstructured Volumes -~~~~~~~~~~~~~~~~~~~~ - -Unstructured volumes can have their topology and geometry freely defined. Geometry can be composed of tetrahedral, hexahedral, wedge or pyramid cell types. The data format used is compatible with VTK and consists of multiple arrays: vertex positions and values, vertex indices, cell start indices, cell types, and cell values. - -Sampled cell values can be specified either per-vertex (``vertex.data``) or per-cell (``cell.data``). If both arrays are set, ``cell.data`` takes precedence. - -Similar to a mesh, each cell is formed by a group of indices into the vertices. For each vertex, the corresponding (by array index) data value will be used for sampling when rendering, if specified. The index order for a tetrahedron is the same as ``VTK_TETRA``: bottom triangle counterclockwise, then the top vertex. - -For hexahedral cells, each hexahedron is formed by a group of eight indices into the vertices and data values. Vertex ordering is the same as ``VTK_HEXAHEDRON``: four bottom vertices counterclockwise, then top four counterclockwise. - -For wedge cells, each wedge is formed by a group of six indices into the vertices and data values. Vertex ordering is the same as ``VTK_WEDGE``: three bottom vertices counterclockwise, then top three counterclockwise. - -For pyramid cells, each cell is formed by a group of five indices into the vertices and data values. Vertex ordering is the same as ``VTK_PYRAMID``: four bottom vertices counterclockwise, then the top vertex. - -To maintain VTK data compatibility, the ``index`` array may be specified with cell sizes interleaved with vertex indices in the following format: :math:`n, id_1, ..., id_n, m, id_1, ..., id_m`. This alternative ``index`` array layout can be enabled through the ``indexPrefixed`` flag (in which case, the ``cell.type`` parameter should be omitted). - -Gradients are computed using finite differences. - -Unstructured volumes are created by passing the type string ``"unstructured"`` to ``vklNewVolume``, and have the following parameters: - -.. table:: Configuration parameters for unstructured (``"unstructured"``) volumes. - - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=====================+====================+==============================+=========================================================================================================================================================+ - | vec3f[] | vertex.position | | [data] array of vertex positions | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | vertex.data | | [data] array of vertex data values to be sampled | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint32[] / uint64[] | index | | [data] array of indices (into the vertex array(s)) that form cells | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | indexPrefixed | false | indicates that the ``index`` array is provided in a VTK-compatible format, where the indices of each cell are prefixed with the number of vertices | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint32[] / uint64[] | cell.index | | [data] array of locations (into the index array), specifying the first index of each cell | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | cell.data | | [data] array of cell data values to be sampled | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint8[] | cell.type | | [data] array of cell types (VTK compatible). Supported types are: | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_TETRAHEDRON`` | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_HEXAHEDRON`` | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_WEDGE`` | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``VKL_PYRAMID`` | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | hexIterative | false | hexahedron interpolation method, defaults to fast non-iterative version which could have rendering inaccuracies may appear if hex is not parallelepiped | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | precomputedNormals | false | whether to accelerate by precomputing, at a cost of 12 bytes/face | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | background | ``VKL_BACKGROUND_UNDEFINED`` | The value that is returned when sampling an undefined region outside the volume domain. | - +---------------------+--------------------+------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - -VDB Volumes -~~~~~~~~~~~ - -VDB volumes implement a data structure that is very similar to the data structure outlined in Museth [1]. - -The data structure is a hierarchical regular grid at its core: Nodes are regular grids, and each grid cell may either store a constant value (this is called a tile), or child pointers. - -Nodes in VDB trees are wide: Nodes on the first level have a resolution of 32^3 voxels by default, on the next level 16^3, and on the leaf level 8^3 voxels. All nodes on a given level have the same resolution. This makes it easy to find the node containing a coordinate using shift operations (cp. [1]). - -VDB leaf nodes are implicit in Open VKL: they are stored as pointers to user-provided data. - -.. figure:: images/vdb_structure.png - :alt: Structure of ``"vdb"`` volumes in the default configuration - - Structure of ``"vdb"`` volumes in the default configuration - -VDB volumes interpret input data as constant cells (which are then potentially filtered). This is in contrast to ``structuredRegular`` volumes, which have a vertex-centered interpretation. - -The VDB implementation in Open VKL follows the following goals: - -- Efficient data structure traversal on vector architectures. - -- Enable the use of industry-standard .vdb files created through the OpenVDB library. - -- Compatibility with OpenVDB on a leaf data level, so that .vdb files may be loaded with minimal overhead. - -VDB volumes are created by passing the type string ``"vdb"`` to ``vklNewVolume``, and have the following parameters: - -.. table:: Configuration parameters for VDB (``"vdb"``) volumes| Type | Name | Default | Description || float[] | indexToObject | 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0 | An array of 12 values of type ``float`` that define the transformation from index space to object space. In index space, the grid is an axis-aligned regular grid, and leaf voxels have size (1,1,1). The first 9 values are interpreted as a row-major linear transformation matrix. The last 3 values are the translation of the grid origin. || uint32[] | node.format | | For each input node, the data format. Currently supported are ``VKL_FORMAT_TILE`` for tiles, and ``VKL_FORMAT_DENSE_ZYX`` for nodes that are dense regular grids. || uint32[] | node.level | | For each input node, the level on which this node exists. Tiles may exist on levels [1, ``VKL_VDB_NUM_LEVELS-1``], all other nodes may only exist on level ``VKL_VDB_NUM_LEVELS-1``. || vec3i[] | node.origin | | For each input node, the node origin index. || VKLData[] | node.data | | For each input node, the attribute data. Single-attribute volumes may have one array provided per node, while multi-attribute volumes require an array per attribute for each node. Nodes with format ``VKL_FORMAT_TILE`` are expected to have single-entry arrays per attribute. Nodes with format ``VKL_FORMAT_DENSE_ZYX`` are expected to have arrays with ``vklVdbLevelNumVoxels(level[i])`` entries per attribute. ``VKL_HALF`` and ``VKL_FLOAT`` data is currently supported; all nodes for a given attribute must be the same data type. || uint32[] | node.temporalFormat | ``VKL_TEMPORAL_FORMAT_CONSTANT`` | The temporal format for this volume. Use ``VKLTemporalFormat`` for named constants. VDB volumes support ``VKL_TEMPORAL_FORMAT_CONSTANT``, ``VKL_TEMPORAL_FORMAT_STRUCTURED``, and ``VKL_TEMPORAL_FORMAT_UNSTRUCTURED``. || int[] | node.temporallyStructuredNumTimesteps | | For temporally structured variation, number of timesteps per voxel. Only valid if ``temporalFormat`` is ``VKL_TEMPORAL_FORMAT_STRUCTURED``. || VKLData[] | node.temporallyUnstructuredIndices | | For temporally unstructured variation, beginning per voxel. Supported data types for each node are ``VKL_UINT`` and ``VKL_ULONG``. Only valid if ``temporalFormat`` is ``VKL_TEMPORAL_FORMAT_UNSTRUCTURED``. || VKLData[] | node.temporallyUnstructuredTimes | | For temporally unstructured variation, time values corresponding to values in ``node.data``. For each node, the data must be of type ``VKL_FLOAT``. Only valid if ``temporalFormat`` is ``VKL_TEMPORAL_FORMAT_UNSTRUCTURED``. || float[] | background | ``VKL_BACKGROUND_UNDEFINED`` | For each attribute, the value that is returned when sampling an undefined region outside the volume domain. | - +-----------+---------------------------------------+------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -The level, origin, format, and data parameters must have the same size, and there must be at least one valid node or ``commit()`` will fail. - -VDB volumes support temporally structured and temporally unstructured temporal variation. See section ‘Temporal Variation’ for more detail. - -The following additional parameters can be set both on ``vdb`` volumes and their sampler objects (sampler object parameters default to volume parameters). - -.. table:: Configuration parameters for VDB (``"vdb"``) volumes and their sampler objects. - - +--------+------------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +========+==================+==========================+===================================================================================================================+ - | int | filter | ``VKL_FILTER_TRILINEAR`` | The filter used for reconstructing the field. Use ``VKLFilter`` for named constants. | - +--------+------------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - | int | gradientFilter | ``filter`` | The filter used for reconstructing the field during gradient computations. Use ``VKLFilter`` for named constants. | - +--------+------------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - | int | maxSamplingDepth | ``VKL_VDB_NUM_LEVELS``-1 | Do not descend further than to this depth during sampling. | - +--------+------------------+--------------------------+-------------------------------------------------------------------------------------------------------------------+ - -VDB volume objects support the following observers: - -.. table:: Observers supported by VDB (``"vdb"``) volumes| Name | Buffer Type | Description | - +===========+=============+====================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================================+ - | InnerNode | float[] | Return an array of bounding boxes along with value ranges, of inner nodes in the data structure. The bounding box is given in object space. For a volume with M attributes, the entries in this array are (6+2*M)-tuples ``(minX, minY, minZ, maxX, maxY, maxZ, lower_0, upper_0, lower_1, upper_1, ...)``. This is in effect a low resolution representation of the volume. The InnerNode observer can be parameterized using ``int maxDepth`` to control the depth at which inner nodes are returned. Note that the observer will also return leaf nodes or tiles at lower levels if they exist. |sampler objects support the following observers: - -.. table:: Observers supported by sampler objects created on VDB (``"vdb"``) volumes. - - +----------------+-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Name | Buffer Type | Description | - +================+=============+================================================================================================================================================================================================================================================+ - | LeafNodeAccess | uint32[] | This observer returns an array with as many entries as input nodes were passed. If the input node i was accessed during traversal, then the ith entry in this array has a nonzero value. This can be used for on-demand loading of leaf nodes. | - +----------------+-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -.. _reconstruction-filters-1: - -Reconstruction filters -^^^^^^^^^^^^^^^^^^^^^^ - -VDB volumes support the filter types ``VKL_FILTER_NEAREST``, ``VKL_FILTER_TRILINEAR``, and ``VKL_FILTER_TRICUBIC`` for both ``filter`` and ``gradientFilter``. - -Note that when ``gradientFilter`` is set to ``VKL_FILTER_NEAREST``, gradients are always :math:`(0, 0, 0)`. - -Major differences to OpenVDB -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -- Open VKL implements sampling in ISPC, and can exploit wide SIMD architectures. - -- VDB volumes in Open VKL are read-only once committed, and designed for rendering only. Authoring or manipulating datasets is not in the scope of this implementation. - -- The only supported field types are ``VKL_HALF`` and ``VKL_FLOAT`` at this point. Other field types may be supported in the future. Note that multi-attribute volumes may be used to represent multi-component (e.g. vector) fields. - -- The root level in Open VKL has a single node with resolution 64^3 (cp. [1]. OpenVDB uses a hash map, instead). - -- Open VKL supports four-level vdb volumes. The resolution of each level can be configured at compile time using CMake variables. - - - ``VKL_VDB_LOG_RESOLUTION_0`` sets the base 2 logarithm of the root level resolution. This variable defaults to 6, which means that the root level has a resolution of :math:`(2^6)^3 = 64^3`. - - ``VKL_VDB_LOG_RESOLUTION_1`` and ``VKL_VDB_LOG_RESOLUTION_2`` default to 5 and 4, respectively. This matches the default Open VDB resolution for inner levels. - - ``VKL_VDB_LOG_RESOLUTION_3`` set the base 2 logarithm of the leaf level resolution, and defaults to 3. Therefore, leaf nodes have a resolution of :math:`8^3` voxels. Again, this matches the Open VDB default. The default settings lead to a domain resolution of :math:`2^18^3=262144^3` voxels. - -Loading OpenVDB .vdb files -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Files generated with OpenVDB can be loaded easily since Open VKL ``vdb`` volumes implement the same leaf data layout. This means that OpenVDB leaf data pointers can be passed to Open VKL using shared data buffers, avoiding copy operations. - -An example of this can be found in ``utility/vdb/include/openvkl/utility/vdb/OpenVdbGrid.h``, where the class ``OpenVdbFloatGrid`` encapsulates the necessary operations. This class is also accessible through the ``vklExamples`` application using the ``-file`` and ``-field`` command line arguments. - -To use this example feature, compile Open VKL with ``OpenVDB_ROOT`` pointing to the OpenVDB prefix. - -1. Museth, K. VDB: High-Resolution Sparse Volumes with Dynamic Topology. ACM Transactions on Graphics 32(3), 2013. DOI: 10.1145/2487228.2487235 - -Particle Volumes -~~~~~~~~~~~~~~~~ - -Particle volumes consist of a set of points in space. Each point has a position, a radius, and a weight typically associated with an attribute. A radial basis function defines the contribution of that particle. Currently, we use the Gaussian radial basis function, - -phi(P) = w \* exp( -0.5 \* ((P - p) / r)^2 ) - -where P is the particle position, p is the sample position, r is the radius and w is the weight. - -At each sample, the scalar field value is then computed as the sum of each radial basis function phi, for each particle that overlaps it. Gradients are similarly computed, based on the summed analytical contributions of each contributing particle. - -The Open VKL implementation is similar to direct evaluation of samples in Reda et al.[2]. It uses an Embree-built BVH with a custom traversal, similar to the method in [1]. - -Particle volumes are created by passing the type string ``"particle"`` to ``vklNewVolume``, and have the following parameters: - -.. table:: Configuration parameters for particle (``"particle"``) volumes. - - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=========+=========================+=========+========================================================================================================================================================================================================================================================================================================================================================================================================================================================+ - | vec3f[] | particle.position | | [data] array of particle positions | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | particle.radius | | [data] array of particle radii | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | particle.weight | null | [data] (optional) array of particle weights, specifying the height of the kernel. | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | radiusSupportFactor | 3.0 | The multipler of the particle radius required for support. Larger radii ensure smooth results at the cost of performance. In the Gaussian kernel, the the radius is one standard deviation (sigma), so a ``radiusSupportFactor`` of 3 corresponds to 3*sigma. | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | clampMaxCumulativeValue | 0 | The maximum cumulative value possible, set by user. All cumulative values will be clamped to this, and further traversal (RBF summation) of particle contributions will halt when this value is reached. A value of zero or less turns this off. | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | estimateValueRanges | true | Enable heuristic estimation of value ranges which are used in internal acceleration structures for interval and hit iterators, as well as for determining the volume’s overall value range. When set to ``false``, the user *must* specify ``clampMaxCumulativeValue``, and all value ranges will be assumed [0, ``clampMaxCumulativeValue``]. Disabling this may improve volume commit time, but will make interval and hit iteration less efficient. | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -1. Knoll, A., Wald, I., Navratil, P., Bowen, A., Reda, K., Papka, M.E. and Gaither, K. (2014), RBF Volume Ray Casting on Multicore and Manycore CPUs. Computer Graphics Forum, 33: 71-80. doi:10.1111/cgf.12363 - -2. K. Reda, A. Knoll, K. Nomura, M. E. Papka, A. E. Johnson and J. Leigh, “Visualizing large-scale atomistic simulations in ultra-resolution immersive environments,” 2013 IEEE Symposium on Large-Scale Data Analysis and Visualization (LDAV), Atlanta, GA, 2013, pp. 59-65. - -Temporal Variation ------------------- - -Open VKL supports two types of temporal variation: temporally structured and temporally unstructured. When one of these modes is enabled, the volume can be sampled at different times. In both modes, time is assumed to vary between zero and one. This can be useful for implementing renderers with motion blur, for example. - -Temporal variation is generally configured through a parameter ``temporalFormat``, which accepts constants from the ``VKLTemporalFormat`` enum, though not all modes may be supported by all volumes. On volumes that expect multiple input nodes, the parameter is an array ``node.temporalFormat``, and must provide one value per node. Multiple attributes in a voxel share the same temporal configuration. Please refer to the individual volume sections above to find out supported for each volume type. - -``temporalFormat`` defaults to ``VKL_TEMPORAL_FORMAT_CONSTANT`` for all volume types. This means that no temporal variation is present in the data. - -Temporally structured variation is configured by setting ``temporalFormat`` to ``VKL_TEMPORAL_FORMAT_STRUCTURED``. In this mode, the volume expects an additional parameter ``[node.]temporallyStructuredNumTimesteps``, which specifies how many time steps are provided for all voxels, and must be at least 2. A volume, or node, with :math:`N` voxels expects :math:`N * temporallyStructuredNumTimesteps` values for each attribute. The values are assumed evenly spaced over times :math:`[0, 1]`: :math:`\{0, 1/(N-1), ..., 1\}` - -Temporally unstructured variation supports differing time step counts and sample times per voxel. For :math:`N` input voxels, ``temporallyUnstructuredIndices`` is an array of :math:`N+1` indices. Voxel :math:`i` has :math:`N_i = [temporallyUnstructuredIndices[i+1]-temporallyUnstructuredIndices[i])` temporal samples starting at index :math:`temporallyUnstructuredIndices[i]`. ``temporallyUnstructuredTimes`` specifies the times corresponding to the sample values; the time values for each voxel must be between zero and one and strictly increasing: :math:`t0 < t1 < ... < tN`. To return a value at sample time t, :math:`t0 <= t <= tN`, Open VKL will interpolate linearly from the two nearest time steps. Time values outside this range are clamped to :math:`[t0, tN]`. - -Sampler Objects ---------------- - -Computing the value of a volume at an object space coordinate is done using the sampling API, and sampler objects. Sampler objects can be created using - -:: - - VKLSampler vklNewSampler(VKLVolume volume); - -Sampler objects may then be parametrized with traversal parameters. Available parameters are defined by volumes, and are a subset of the volume parameters. As an example, ``filter`` can be set on both ``vdb`` volumes and their sampler objects. The volume parameter is used as the default for sampler objects. The sampler object parameter provides an override per ray. More detail on parameters can be found in the sections on volumes. Use ``vklCommit()`` to commit parameters to the sampler object. - -Sampling --------- - -The scalar API takes a volume and coordinate, and returns a float value. The volume’s background value (by default ``VKL_BACKGROUND_UNDEFINED``) is returned for probe points outside the volume. The attribute index selects the scalar attribute of interest; not all volumes support multiple attributes. The time value, which must be between 0 and 1, specifies the sampling time. For temporally constant volumes, this value has no effect. - -:: - - float vklComputeSample(VKLSampler sampler, - const vkl_vec3f *objectCoordinates, - unsigned int attributeIndex, - float time); - -Vector versions allow sampling at 4, 8, or 16 positions at once. Depending on the machine type and Open VKL device implementation, these can give greater performance. An active lane mask ``valid`` is passed in as an array of integers; set 0 for lanes to be ignored, -1 for active lanes. An array of time values corresponding to each object coordinate may be provided; a ``NULL`` value indicates all times are zero. - -:: - - void vklComputeSample4(const int *valid, - VKLSampler sampler, - const vkl_vvec3f4 *objectCoordinates, - float *samples, - unsigned int attributeIndex, - const float *times); - - void vklComputeSample8(const int *valid, - VKLSampler sampler, - const vkl_vvec3f8 *objectCoordinates, - float *samples, - unsigned int attributeIndex, - const float *times); - - void vklComputeSample16(const int *valid, - VKLSampler sampler, - const vkl_vvec3f16 *objectCoordinates, - float *samples, - unsigned int attributeIndex, - const float *times); - -A stream version allows sampling an arbitrary number of positions at once. While the vector version requires coordinates to be provided in a structure-of-arrays layout, the stream version allows coordinates to be provided in an array-of-structures layout. Thus, the stream API can be used to avoid reformatting of data by the application. As with the vector versions, the stream API can give greater performance than the scalar API. - -:: - - void vklComputeSampleN(VKLSampler sampler, - unsigned int N, - const vkl_vec3f *objectCoordinates, - float *samples, - unsigned int attributeIndex, - const float *times); - -All of the above sampling APIs can be used, regardless of the device’s native SIMD width. - -Sampling Multiple Attributes -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Open VKL provides additional APIs for sampling multiple scalar attributes in a single call through the ``vklComputeSampleM*()`` interfaces. Beyond convenience, these can give improved performance relative to the single attribute sampling APIs. As with the single attribute APIs, sampling time values may be specified; note that these are provided per object coordinate only (rather than separately per attribute). - -A scalar API supports sampling ``M`` attributes specified by ``attributeIndices`` on a single object space coordinate: - -:: - - void vklComputeSampleM(VKLSampler sampler, - const vkl_vec3f *objectCoordinates, - float *samples, - unsigned int M, - const unsigned int *attributeIndices, - float time); - -Vector versions allow sampling at 4, 8, or 16 positions at once across the ``M`` attributes: - -:: - - void vklComputeSampleM4(const int *valid, - VKLSampler sampler, - const vkl_vvec3f4 *objectCoordinates, - float *samples, - unsigned int M, - const unsigned int *attributeIndices, - const float *times); - - void vklComputeSampleM8(const int *valid, - VKLSampler sampler, - const vkl_vvec3f8 *objectCoordinates, - float *samples, - unsigned int M, - const unsigned int *attributeIndices, - const float *times); - - void vklComputeSampleM16(const int *valid, - VKLSampler sampler, - const vkl_vvec3f16 *objectCoordinates, - float *samples, - unsigned int M, - const unsigned int *attributeIndices, - const float *times); - -The ``[4, 8, 16] * M`` sampled values are populated in the ``samples`` array in a structure-of-arrays layout, with all values for each attribute provided in sequence. That is, sample values ``s_m,n`` for the ``m``\ th attribute and ``n``\ th object coordinate will be populated as - -:: - - samples = [s_0,0, s_0,1, ..., s_0,N-1, - s_1,0, s_1,1, ..., s_1,N-1, - ..., - s_M-1,0, s_M-1,1, ..., s_M-1,N-1] - -A stream version allows sampling an arbitrary number of positions at once across the ``M`` attributes. As with single attribute stream sampling, the ``N`` coordinates are provided in an array-of-structures layout. - -:: - - void vklComputeSampleMN(VKLSampler sampler, - unsigned int N, - const vkl_vec3f *objectCoordinates, - float *samples, - unsigned int M, - const unsigned int *attributeIndices, - const float *times); - -The ``M * N`` sampled values are populated in the ``samples`` array in an array-of-structures layout, with all attribute values for each coordinate provided in sequence as - -:: - - samples = [s_0,0, s_1,0, ..., s_M-1,0, - s_0,1, s_1,1, ..., s_M-1,1, - ..., - s_0,N-1, s_1,N-1, ..., s_M-1,N-1] - -All of the above sampling APIs can be used, regardless of the device’s native SIMD width. - -Gradients ---------- - -In a very similar API to ``vklComputeSample``, ``vklComputeGradient`` queries the value gradient at an object space coordinate. Again, a scalar API, now returning a vec3f instead of a float. NaN values are returned for points outside the volume. The time value, which must be between 0 and 1, specifies the sampling time. For temporally constant volumes, this value has no effect. - -:: - - vkl_vec3f vklComputeGradient(VKLSampler sampler, - const vkl_vec3f *objectCoordinates, - unsigned int attributeIndex, - float time); - -Vector versions are also provided: - -:: - - void vklComputeGradient4(const int *valid, - VKLSampler sampler, - const vkl_vvec3f4 *objectCoordinates, - vkl_vvec3f4 *gradients, - unsigned int attributeIndex, - const float *times); - - void vklComputeGradient8(const int *valid, - VKLSampler sampler, - const vkl_vvec3f8 *objectCoordinates, - vkl_vvec3f8 *gradients, - unsigned int attributeIndex, - const float *times); - - void vklComputeGradient16(const int *valid, - VKLSampler sampler, - const vkl_vvec3f16 *objectCoordinates, - vkl_vvec3f16 *gradients, - unsigned int attributeIndex, - const float *times); - -Finally, a stream version is provided: - -:: - - void vklComputeGradientN(VKLSampler sampler, - unsigned int N, - const vkl_vec3f *objectCoordinates, - vkl_vec3f *gradients, - unsigned int attributeIndex, - const float *times); - -All of the above gradient APIs can be used, regardless of the device’s native SIMD width. - -Iterators ---------- - -Open VKL has APIs to search for particular volume values along a ray. Queries can be for ranges of volume values (``vklIterateInterval``) or for particular values (``vklIterateHit``). - -Interval iterators require a context object to define the sampler and parameters related to iteration behavior. An interval iterator context is created via - -:: - - VKLIntervalIteratorContext vklNewIntervalIteratorContext(VKLSampler sampler); - -The parameters understood by interval iterator contexts are defined in the table below. - -.. table:: Configuration parameters for interval iterator contexts. - - +---------------+------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +===============+========================+=============+=========================================================================================================================================================================================================================================================================================+ - | int | attributeIndex | 0 | Defines the volume attribute of interest. | - +---------------+------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vkl_range1f[] | valueRanges | [-inf, inf] | Defines the value ranges of interest. Intervals not containing any of these values ranges may be skipped during iteration. | - +---------------+------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | intervalResolutionHint | 0.5 | A value in the range [0, 1] affecting the resolution (size) of returned intervals. A value of 0 yields the lowest resolution (largest) intervals while 1 gives the highest resolution (smallest) intervals. This value is only a hint; it may not impact behavior for all volume types. | - +---------------+------------------------+-------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Most volume types support the ``intervalResolutionHint`` parameter that can impact the size of intervals returned duration iteration. These include ``amr``, ``particle``, ``structuredRegular``, ``unstructured``, and ``vdb`` volumes. In all cases a value of 1.0 yields the highest resolution (smallest) intervals possible, while a value of 0.0 gives the lowest resolution (largest) intervals. In general, smaller intervals will have tighter bounds on value ranges, and more efficient space skipping behavior than larger intervals, which can be beneficial for some rendering methods. - -For ``structuredRegular``, ``unstructured``, and ``vdb`` volumes, a value of 1.0 will enable elementary cell iteration, such that each interval spans an individual voxel / cell intersection. Note that interval iteration can be significantly slower in this case. - -As with other objects, the interval iterator context must be committed before being used. - -To query an interval, a ``VKLIntervalIterator`` of scalar or vector width must be initialized with ``vklInitIntervalIterator``. Time value(s) may be provided to specify the sampling time. These values must be between 0 and 1; for the vector versions, a ``NULL`` value indicates all times are zero. For temporally constant volumes, the time values have no effect. - -:: - - VKLIntervalIterator vklInitIntervalIterator(VKLIntervalIteratorContext context, - const vkl_vec3f *origin, - const vkl_vec3f *direction, - const vkl_range1f *tRange, - float time, - void *buffer); - - VKLIntervalIterator4 vklInitIntervalIterator4(const int *valid, - VKLIntervalIteratorContext context, - const vkl_vvec3f4 *origin, - const vkl_vvec3f4 *direction, - const vkl_vrange1f4 *tRange, - const float *times, - void *buffer); - - VKLIntervalIterator8 vklInitIntervalIterator8(const int *valid, - VKLIntervalIteratorContext context, - const vkl_vvec3f8 *origin, - const vkl_vvec3f8 *direction, - const vkl_vrange1f8 *tRange, - const float *times, - void *buffer); - - VKLIntervalIterator16 vklInitIntervalIterator16(const int *valid, - VKLIntervalIteratorContext context, - const vkl_vvec3f16 *origin, - const vkl_vvec3f16 *direction, - const vkl_vrange1f16 *tRange, - const float *times, - void *buffer); - -Open VKL places the iterator struct into a user-provided buffer, and the returned handle is essentially a pointer into this buffer. This means that the iterator handle must not be used after the buffer ceases to exist. Copying iterator buffers is currently not supported. - -The required size, in bytes, of the buffer can be queried with - -:: - - size_t vklGetIntervalIteratorSize(VKLIntervalIteratorContext context); - - size_t vklGetIntervalIteratorSize4(VKLIntervalIteratorContext context); - - size_t vklGetIntervalIteratorSize8(VKLIntervalIteratorContext context); - - size_t vklGetIntervalIteratorSize16(VKLIntervalIteratorContext context); - -The values these functions return may change depending on the parameters set on ``sampler``. - -Open VKL also provides a conservative maximum size over all volume types as a preprocessor definition (``VKL_MAX_INTERVAL_ITERATOR_SIZE``). For ISPC use cases, Open VKL will attempt to detect the native vector width using ``TARGET_WIDTH``, which is defined in recent versions of ISPC, to provide a less conservative size. - -Intervals can then be processed by calling ``vklIterateInterval`` as long as the returned lane masks indicates that the iterator is still within the volume: - -:: - - int vklIterateInterval(VKLIntervalIterator iterator, - VKLInterval *interval); - - void vklIterateInterval4(const int *valid, - VKLIntervalIterator4 iterator, - VKLInterval4 *interval, - int *result); - - void vklIterateInterval8(const int *valid, - VKLIntervalIterator8 iterator, - VKLInterval8 *interval, - int *result); - - void vklIterateInterval16(const int *valid, - VKLIntervalIterator16 iterator, - VKLInterval16 *interval, - int *result); - -The intervals returned have a t-value range, a value range, and a ``nominalDeltaT`` which is approximately the step size (in units of ray direction) that should be used to walk through the interval, if desired. The number and length of intervals returned is volume type implementation dependent. There is currently no way of requesting a particular splitting. - -:: - - typedef struct - { - vkl_range1f tRange; - vkl_range1f valueRange; - float nominalDeltaT; - } VKLInterval; - - typedef struct - { - vkl_vrange1f4 tRange; - vkl_vrange1f4 valueRange; - float nominalDeltaT[4]; - } VKLInterval4; - - typedef struct - { - vkl_vrange1f8 tRange; - vkl_vrange1f8 valueRange; - float nominalDeltaT[8]; - } VKLInterval8; - - typedef struct - { - vkl_vrange1f16 tRange; - vkl_vrange1f16 valueRange; - float nominalDeltaT[16]; - } VKLInterval16; - -Querying for particular values is done using a ``VKLHitIterator`` in much the same fashion. This API could be used, for example, to find isosurfaces. As with interval iterators, time value(s) may be provided to specify the sampling time. These values must be between 0 and 1; for the vector versions, a ``NULL`` value indicates all times are zero. For temporally constant volumes, the time values have no effect. - -Hit iterators similarly require a context object to define the sampler and other iteration parameters. A hit iterator context is created via - -:: - - VKLHitIteratorContext vklNewHitIteratorContext(VKLSampler sampler); - -The parameters understood by hit iterator contexts are defined in the table below. - -.. table:: Configuration parameters for hit iterator contexts. - - +-----------+----------------+---------+-------------------------------------------+ - | Type | Name | Default | Description | - +===========+================+=========+===========================================+ - | int | attributeIndex | 0 | Defines the volume attribute of interest. | - +-----------+----------------+---------+-------------------------------------------+ - | float[] | values | | Defines the value(s) of interest. | - +-----------+----------------+---------+-------------------------------------------+ - -The hit iterator context must be committed before being used. - -Again, a user allocated buffer must be provided, and a ``VKLHitIterator`` of the desired width must be initialized: - -:: - - VKLHitIterator vklInitHitIterator(VKLHitIteratorContext context, - const vkl_vec3f *origin, - const vkl_vec3f *direction, - const vkl_range1f *tRange, - float time, - void *buffer); - - VKLHitIterator4 vklInitHitIterator4(const int *valid, - VKLHitIteratorContext context, - const vkl_vvec3f4 *origin, - const vkl_vvec3f4 *direction, - const vkl_vrange1f4 *tRange, - const float *times, - void *buffer); - - VKLHitIterator8 vklInitHitIterator8(const int *valid, - VKLHitIteratorContext context, - const vkl_vvec3f8 *origin, - const vkl_vvec3f8 *direction, - const vkl_vrange1f8 *tRange, - const float *times, - void *buffer); - - VKLHitIterator16 vklInitHitIterator16(const int *valid, - VKLHitIteratorContext context, - const vkl_vvec3f16 *origin, - const vkl_vvec3f16 *direction, - const vkl_vrange1f16 *tRange, - const float *times, - void *buffer); - -Buffer size can be queried with - -:: - - size_t vklGetHitIteratorSize(VKLHitIteratorContext context); - - size_t vklGetHitIteratorSize4(VKLHitIteratorContext context); - - size_t vklGetHitIteratorSize8(VKLHitIteratorContext context); - - size_t vklGetHitIteratorSize16(VKLHitIteratorContext context); - -Open VKL also provides the macro ``VKL_MAX_HIT_ITERATOR_SIZE`` as a conservative estimate. - -Hits are then queried by looping a call to ``vklIterateHit`` as long as the returned lane mask indicates that the iterator is still within the volume. - -:: - - int vklIterateHit(VKLHitIterator iterator, VKLHit *hit); - - void vklIterateHit4(const int *valid, - VKLHitIterator4 iterator, - VKLHit4 *hit, - int *result); - - void vklIterateHit8(const int *valid, - VKLHitIterator8 iterator, - VKLHit8 *hit, - int *result); - - void vklIterateHit16(const int *valid, - VKLHitIterator16 iterator, - VKLHit16 *hit, - int *result); - -Returned hits consist of a t-value, a volume value (equal to one of the requested values specified in the context), and an (object space) epsilon value estimating the error of the intersection: - -:: - - typedef struct - { - float t; - float sample; - float epsilon; - } VKLHit; - - typedef struct - { - float t[4]; - float sample[4]; - float epsilon[4]; - } VKLHit4; - - typedef struct - { - float t[8]; - float sample[8]; - float epsilon[8]; - } VKLHit8; - - typedef struct - { - float t[16]; - float sample[16]; - float epsilon[16]; - } VKLHit16; - -For both interval and hit iterators, only the vector-wide API for the native SIMD width (determined via ``vklGetNativeSIMDWidth`` can be called. The scalar versions are always valid. This restriction will likely be lifted in the future. - -Performance Recommendations -=========================== - -MXCSR control and status register ---------------------------------- - -It is strongly recommended to have the ``Flush to Zero`` and ``Denormals are Zero`` mode of the MXCSR control and status register enabled for each thread before calling the sampling, gradient, or interval API functions. Otherwise, under some circumstances special handling of denormalized floating point numbers can significantly reduce application and Open VKL performance. The device parameter ``flushDenormals`` or environment variable ``OPENVKL_FLUSH_DENORMALS`` can be used to toggle this mode; by default it is enabled. Alternatively, when using Open VKL together with the Intel® Threading Building Blocks, it is sufficient to execute the following code at the beginning of the application main thread (before the creation of the ``tbb::task_scheduler_init`` object): - -:: - - #include - #include - ... - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); - _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON); - -If using a different tasking system, make sure each thread calling into Open VKL has the proper mode set. - -Iterator Allocation -------------------- - -``vklInitIntervalIterator`` and ``vklInitHitIterator`` expect a user allocated buffer. While this buffer can be allocated by any means, we expect iterators to be used in inner loops and advise against heap allocation in that case. Applications may provide high performance memory pools, but as a preferred alternative we recommend stack allocated buffers. - -In C99, variable length arrays provide an easy way to achieve this: - -:: - - const size_t bufferSize = vklGetIntervalIteratorSize(sampler); - char buffer[bufferSize]; - -Note that the call to ``vklGetIntervalIteratorSize`` or ``vklGetHitIteratorSize`` should not appear in an inner loop as it is relatively costly. The return value depends on the volume type, target architecture, and parameters to ``sampler``. - -In C++, variable length arrays are not part of the standard. Here, users may rely on ``alloca`` and similar functions: - -:: - - #include - const size_t bufferSize = vklGetIntervalIteratorSize(sampler); - void *buffer = alloca(bufferSize); - -Similarly for ISPC, variable length arrays are not supported, but ``alloca`` may be used: - -:: - - const uniform size_t bufferSize = vklGetIntervalIteratorSizeV(sampler); - void *uniform buffer = alloca(bufferSize); - -Users should understand the implications of ``alloca``. In particular, ``alloca`` does check available stack space and may result in stack overflow. ``buffer`` also becomes invalid at the end of the scope. As one consequence, it cannot be returned from a function. On Windows, ``_malloca`` is a safer option that performs additional error checking, but requires the use of ``_freea``. - -Applications may instead rely on the ``VKL_MAX_INTERVAL_ITERATOR_SIZE`` and ``VKL_MAX_HIT_ITERATOR_SIZE`` macros. For example, in ISPC: - -:: - - uniform unsigned int8 buffer[VKL_MAX_INTERVAL_ITERATOR_SIZE]; - -These values are majorants over all devices and volume types. Note that Open VKL attempts to detect the target SIMD width using ``TARGET_WIDTH``, returning smaller buffer sizes for narrow architectures. However, Open VKL may fall back to the largest buffer size over all targets. - -Multi-attribute Volume Data Layout ----------------------------------- - -Open VKL provides flexible managed data APIs that allow applications to specify input data in various formats and layouts. When shared buffers are used (``dataCreationFlags = VKL_DATA_SHARED_BUFFER``), Open VKL will use the application-owned memory directly, respecting the input data layout. Shared buffers therefore allow applications to strategically select the best layout for multi-attribute volume data and expected sampling behavior. - -For volume attributes that are sampled individually (e.g. using ``vklComputeSample[4,8,16,N]()``), it is recommended to use a structure-of-arrays layout. That is, each attribute’s data should be compact in contiguous memory. This can be accomplished by simply using Open VKL owned data objects (``dataCreationFlags = VKL_DATA_DEFAULT``), or by using a natural ``byteStride`` for shared buffers. - -For volume attributes that are sampled simultaneously (e.g. using ``vklComputeSampleM[4,8,16,N]()``), it is recommended to use an array-of-structures layout. That is, data for these attributes should be provided per voxel in a contiguous layout. This is accomplished using shared buffers for each attribute with appropriate byte strides. For example, for a three attribute structured volume representing a velocity field, the data can be provided as: - -:: - - // used in Open VKL shared buffers, so must not be freed by application - std::vector velocities(numVoxels); - - for (auto &v : velocities) { - v.x = ...; - v.y = ...; - v.z = ...; - } - - std::vector attributes; - - attributes.push_back(vklNewData(device, - velocities.size(), - VKL_FLOAT, - &velocities[0].x, - VKL_DATA_SHARED_BUFFER, - sizeof(vkl_vec3f))); - - attributes.push_back(vklNewData(device, - velocities.size(), - VKL_FLOAT, - &velocities[0].y, - VKL_DATA_SHARED_BUFFER, - sizeof(vkl_vec3f))); - - attributes.push_back(vklNewData(device, - velocities.size(), - VKL_FLOAT, - &velocities[0].z, - VKL_DATA_SHARED_BUFFER, - sizeof(vkl_vec3f))); - - VKLData attributesData = - vklNewData(device, attributes.size(), VKL_DATA, attributes.data()); - - for (auto &attribute : attributes) - vklRelease(attribute); - - VKLVolume volume = vklNewVolume(device, "structuredRegular"); - - vklSetData(volume, "data", attributesData); - vklRelease(attributesData); - - // set other volume parameters... - - vklCommit(volume); - -These are general recommendations for common scenarios; it is still recommended to evaluate performance of different volume data layouts for your application’s particular use case. diff --git a/source/elements/oneART/source/openvkl.rst b/source/elements/oneART/source/openvkl.rst deleted file mode 100644 index 258a19900..000000000 --- a/source/elements/oneART/source/openvkl.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. SPDX-FileCopyrightText: 2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _openvkl-section: - -======== -Open VKL -======== - -Open Volume Kernel Library (Open VKL) is a collection of -high-performance volume computation kernels. - -.. toctree:: - :maxdepth: 1 - - openvkl-intro - openvkl-spec - - - - diff --git a/source/elements/oneART/source/ospray-hydra-plugin.rst b/source/elements/oneART/source/ospray-hydra-plugin.rst deleted file mode 100644 index 967c9a17f..000000000 --- a/source/elements/oneART/source/ospray-hydra-plugin.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _ospray-hydra-plugin-section: - -============================ -OSPRay Plug-in for USD Hydra -============================ - -The Intel® OSPRay Plug-in for USD Hydra is an open source plugin -for Pixar’s USD to extend USD’s Hydra rendering framework with Intel® OSPRay. -The OSPRay for Hydra Plug-in enables interactive scene preview by -utilizing OSPRay’s high quality renderers and the Intel® Open Image Denoise denoiser. - -As part of the oneAPI Rendering Toolkit, OSPRay is highly-optimized for -Intel® CPU architectures ranging from laptops to large-scale distributed -HPC systems. HdOSPRay leverages the Intel® Rendering Framework to deliver -interactive rendering for large-scale models at high levels of fidelity. - -More information can be found at the `OSPRay for Hydra Plug-in website`_. - -.. _`OSPRay for Hydra Plug-in website`: https://github.com/ospray/hdospray diff --git a/source/elements/oneART/source/ospray-intro.rst b/source/elements/oneART/source/ospray-intro.rst deleted file mode 100644 index bd133174f..000000000 --- a/source/elements/oneART/source/ospray-intro.rst +++ /dev/null @@ -1,15 +0,0 @@ -.. SPDX-FileCopyrightText: 2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -============ -Introduction -============ - -OSPRay is a scalable, and portable ray tracing engine for -high-performance, high-fidelity visualization. - -The purpose of OSPRay is to provide an open, powerful, and easy-to-use -rendering library that allows one to easily build applications that -use ray tracing based rendering for interactive applications -(including both surface- and volume-based visualizations). diff --git a/source/elements/oneART/source/ospray-spec.rst b/source/elements/oneART/source/ospray-spec.rst deleted file mode 100644 index 12624544a..000000000 --- a/source/elements/oneART/source/ospray-spec.rst +++ /dev/null @@ -1,2536 +0,0 @@ -OSPRay API -========== - -To access the OSPRay API you first need to include the OSPRay header - -.. code:: cpp - - #include "ospray/ospray.h" - -where the API is compatible with C99 and C++. - -Initialization and Shutdown ---------------------------- - -To use the API, OSPRay must be initialized with a “device”. A device is the object which implements the API. Creating and initializing a device can be done in either of two ways: command line arguments using ``ospInit`` or manually instantiating a device and setting parameters on it. - -Command Line Arguments -~~~~~~~~~~~~~~~~~~~~~~ - -The first is to do so by giving OSPRay the command line from ``main()`` by calling - -.. code:: cpp - - OSPError ospInit(int *argc, const char **argv); - -OSPRay parses (and removes) its known command line parameters from your application’s ``main`` function. For an example see the `tutorial <#osptutorial>`__. For possible error codes see section `Error Handling and Status Messages <#error-handling-and-status-messages>`__. It is important to note that the arguments passed to ``ospInit()`` are processed in order they are listed. The following parameters (which are prefixed by convention with “``--osp:``”) are understood: - -.. table:: Command line parameters accepted by OSPRay’s ``ospInit``. - - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Parameter | Description | - +===============================================+======================================================================================================================================================================================================================================+ - | ``--osp:debug`` | enables various extra checks and debug output, and disables multi-threading | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:num-threads=`` | use ``n`` threads instead of per default using all detected hardware threads | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:log-level=`` | set logging level; valid values (in order of severity) are ``none``, ``error``, ``warning``, ``info``, and ``debug`` | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:warn-as-error`` | send ``warning`` and ``error`` messages through the error callback, otherwise send ``warning`` messages through the message callback; must have sufficient ``logLevel`` to enable warnings | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:verbose`` | shortcut for ``--osp:log-level=info`` and enable debug output on ``cout``, error output on ``cerr`` | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:vv`` | shortcut for ``--osp:log-level=debug`` and enable debug output on ``cout``, error output on ``cerr`` | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:load-modules=[,...]`` | load one or more modules during initialization; equivalent to calling ``ospLoadModule(name)`` | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:log-output=`` | convenience for setting where status messages go; valid values for ``dst`` are ``cerr`` and ``cout`` | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:error-output=`` | convenience for setting where error messages go; valid values for ``dst`` are ``cerr`` and ``cout`` | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:device=`` | use ``name`` as the type of device for OSPRay to create; e.g., ``--osp:device=cpu`` gives you the default ``cpu`` device; Note if the device to be used is defined in a module, remember to pass ``--osp:load-modules=`` first | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:set-affinity=`` | if ``1``, bind software threads to hardware threads; ``0`` disables binding; default is ``1`` on KNL and ``0`` otherwise | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | ``--osp:device-params=:[,...]`` | set one or more other device parameters; equivalent to calling ``ospDeviceSet*(param, value)`` | - +-----------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Manual Device Instantiation -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The second method of initialization is to explicitly create the device and possibly set parameters. This method looks almost identical to how other `objects <#objects>`__ are created and used by OSPRay (described in later sections). The first step is to create the device with - -.. code:: cpp - - OSPDevice ospNewDevice(const char *type); - -where the ``type`` string maps to a specific device implementation. OSPRay always provides the “``cpu``” device, which maps to a fast, local CPU implementation. Other devices can also be added through additional modules, such as distributed MPI device implementations. - -Once a device is created, you can call - -.. code:: cpp - - void ospDeviceSetParam(OSPObject, const char *id, OSPDataType type, const void *mem); - -to set parameters on the device. The semantics of setting parameters is exactly the same as ``ospSetParam``, which is documented below in the `parameters <#parameters>`__ section. The following parameters can be set on all devices: - -.. table:: Parameters shared by all devices. - - +--------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Description | - +========+=============+============================================================================================================================================================================================+ - | int | numThreads | number of threads which OSPRay should use | - +--------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | int | logLevel | logging level; valid values (in order of severity) are ``OSP_LOG_NONE``, ``OSP_LOG_ERROR``, ``OSP_LOG_WARNING``, ``OSP_LOG_INFO``, and ``OSP_LOG_DEBUG`` | - +--------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | string | logOutput | convenience for setting where status messages go; valid values are ``cerr`` and ``cout`` | - +--------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | string | errorOutput | convenience for setting where error messages go; valid values are ``cerr`` and ``cout`` | - +--------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | debug | set debug mode; equivalent to ``logLevel=debug`` and ``numThreads=1`` | - +--------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | warnAsError | send ``warning`` and ``error`` messages through the error callback, otherwise send ``warning`` messages through the message callback; must have sufficient ``logLevel`` to enable warnings | - +--------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | setAffinity | bind software threads to hardware threads if set to 1; 0 disables binding omitting the parameter will let OSPRay choose | - +--------+-------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Once parameters are set on the created device, the device must be committed with - -.. code:: cpp - - void ospDeviceCommit(OSPDevice); - -To use the newly committed device, you must call - -.. code:: cpp - - void ospSetCurrentDevice(OSPDevice); - -This then sets the given device as the object which will respond to all other OSPRay API calls. - -Device handle lifetimes are managed with two calls, the first which increments the internal reference count to the given ``OSPDevice`` - -.. code:: cpp - - void ospDeviceRetain(OSPDevice) - -and the second which decrements the reference count - -.. code:: cpp - - void ospDeviceRelease(OSPDevice) - -Users can change parameters on the device after initialization (from either method above), by calling - -.. code:: cpp - - OSPDevice ospGetCurrentDevice(); - -This function returns the handle to the device currently used to respond to OSPRay API calls, where users can set/change parameters and recommit the device. If changes are made to the device that is already set as the current device, it does not need to be set as current again. Note this API call will increment the ref count of the returned device handle, so applications must use ``ospDeviceRelease`` when finished using the handle to avoid leaking the underlying device object. If there is no current device set, this will return an invalid ``NULL`` handle. - -When a device is created, its reference count is initially ``1``. When a device is set as the current device, it internally has its reference count incremented. Note that ``ospDeviceRetain`` and ``ospDeviceRelease`` should only be used with reference counts that the application tracks: removing reference held by the current set device should be handled by ``ospShutdown``. Thus, ``ospDeviceRelease`` should only decrement the reference counts that come from ``ospNewDevice``, ``ospGetCurrentDevice``, and the number of explicit calls to ``ospDeviceRetain``. - -OSPRay allows applications to query runtime properties of a device in order to do enhanced validation of what device was loaded at runtime. The following function can be used to get these device-specific properties (attributes about the device, not parameter values) - -.. code:: cpp - - int64_t ospDeviceGetProperty(OSPDevice, OSPDeviceProperty); - -It returns an integer value of the queried property and the following properties can be provided as parameter: - -.. code:: cpp - - OSP_DEVICE_VERSION - OSP_DEVICE_VERSION_MAJOR - OSP_DEVICE_VERSION_MINOR - OSP_DEVICE_VERSION_PATCH - OSP_DEVICE_SO_VERSION - -Environment Variables -~~~~~~~~~~~~~~~~~~~~~ - -OSPRay’s generic device parameters can be overridden via environment variables for easy changes to OSPRay’s behavior without needing to change the application (variables are prefixed by convention with “``OSPRAY_``”): - -.. table:: Environment variables interpreted by OSPRay. - - +----------------------+--------------------------------------------------------------------------------------------------------------+ - | Variable | Description | - +======================+==============================================================================================================+ - | OSPRAY_NUM_THREADS | equivalent to ``--osp:num-threads`` | - +----------------------+--------------------------------------------------------------------------------------------------------------+ - | OSPRAY_LOG_LEVEL | equivalent to ``--osp:log-level`` | - +----------------------+--------------------------------------------------------------------------------------------------------------+ - | OSPRAY_LOG_OUTPUT | equivalent to ``--osp:log-output`` | - +----------------------+--------------------------------------------------------------------------------------------------------------+ - | OSPRAY_ERROR_OUTPUT | equivalent to ``--osp:error-output`` | - +----------------------+--------------------------------------------------------------------------------------------------------------+ - | OSPRAY_DEBUG | equivalent to ``--osp:debug`` | - +----------------------+--------------------------------------------------------------------------------------------------------------+ - | OSPRAY_WARN_AS_ERROR | equivalent to ``--osp:warn-as-error`` | - +----------------------+--------------------------------------------------------------------------------------------------------------+ - | OSPRAY_SET_AFFINITY | equivalent to ``--osp:set-affinity`` | - +----------------------+--------------------------------------------------------------------------------------------------------------+ - | OSPRAY_LOAD_MODULES | equivalent to ``--osp:load-modules``, can be a comma separated list of modules which will be loaded in order | - +----------------------+--------------------------------------------------------------------------------------------------------------+ - | OSPRAY_DEVICE | equivalent to ``--osp:device:`` | - +----------------------+--------------------------------------------------------------------------------------------------------------+ - -Note that these environment variables take precedence over values specified through ``ospInit`` or manually set device parameters. - -Error Handling and Status Messages -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The following errors are currently used by OSPRay: - -.. table:: Possible error codes, i.e., valid named constants of type ``OSPError``. - - +-----------------------+------------------------------------------------------------------------------+ - | Name | Description | - +=======================+==============================================================================+ - | OSP_NO_ERROR | no error occurred | - +-----------------------+------------------------------------------------------------------------------+ - | OSP_UNKNOWN_ERROR | an unknown error occurred | - +-----------------------+------------------------------------------------------------------------------+ - | OSP_INVALID_ARGUMENT | an invalid argument was specified | - +-----------------------+------------------------------------------------------------------------------+ - | OSP_INVALID_OPERATION | the operation is not allowed for the specified object | - +-----------------------+------------------------------------------------------------------------------+ - | OSP_OUT_OF_MEMORY | there is not enough memory to execute the command | - +-----------------------+------------------------------------------------------------------------------+ - | OSP_UNSUPPORTED_CPU | the CPU is not supported (minimum ISA is SSE4.1 on x86_64 and NEON on ARM64) | - +-----------------------+------------------------------------------------------------------------------+ - | OSP_VERSION_MISMATCH | a module could not be loaded due to mismatching version | - +-----------------------+------------------------------------------------------------------------------+ - -These error codes are either directly return by some API functions, or are recorded to be later queried by the application via - -.. code:: cpp - - OSPError ospDeviceGetLastErrorCode(OSPDevice); - -A more descriptive error message can be queried by calling - -.. code:: cpp - - const char* ospDeviceGetLastErrorMsg(OSPDevice); - -Alternatively, the application can also register a callback function of type - -.. code:: cpp - - typedef void (*OSPErrorCallback)(void *userData, OSPError, const char* errorDetails); - -via - -.. code:: cpp - - void ospDeviceSetErrorCallback(OSPDevice, OSPErrorCallback, void *userData); - -to get notified when errors occur. - -Applications may be interested in messages which OSPRay emits, whether for debugging or logging events. Applications can call - -.. code:: cpp - - void ospDeviceSetStatusCallback(OSPDevice, OSPStatusCallback, void *userData); - -in order to register a callback function of type - -.. code:: cpp - - typedef void (*OSPStatusCallback)(void *userData, const char* messageText); - -which OSPRay will use to emit status messages. By default, OSPRay uses a callback which does nothing, so any output desired by an application will require that a callback is provided. Note that callbacks for C++ ``std::cout`` and ``std::cerr`` can be alternatively set through ``ospInit()`` or the ``OSPRAY_LOG_OUTPUT`` environment variable. - -Applications can clear either callback by passing ``NULL`` instead of an actual function pointer. - -Loading OSPRay Extensions at Runtime -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -OSPRay’s functionality can be extended via plugins (which we call “modules”), which are implemented in shared libraries. To load module ``name`` from ``libospray_module_.so`` (on Linux and Mac OS X) or ``ospray_module_.dll`` (on Windows) use - -.. code:: cpp - - OSPError ospLoadModule(const char *name); - -Modules are searched in OS-dependent paths. ``ospLoadModule`` returns ``OSP_NO_ERROR`` if the plugin could be successfully loaded. - -Shutting Down OSPRay -~~~~~~~~~~~~~~~~~~~~ - -When the application is finished using OSPRay (typically on application exit), the OSPRay API should be finalized with - -.. code:: cpp - - void ospShutdown(); - -This API call ensures that the current device is cleaned up appropriately. Due to static object allocation having non-deterministic ordering, it is recommended that applications call ``ospShutdown()`` before the calling application process terminates. - -Objects -------- - -All entities of OSPRay (the `renderer <#renderers>`__, `volumes <#volumes>`__, `geometries <#geometries>`__, `lights <#lights>`__, `cameras <#cameras>`__, …) are a logical specialization of ``OSPObject`` and share common mechanism to deal with parameters and lifetime. - -An important aspect of object parameters is that parameters do not get passed to objects immediately. Instead, parameters are not visible at all to objects until they get explicitly committed to a given object via a call to - -.. code:: cpp - - void ospCommit(OSPObject); - -at which time all previously additions or changes to parameters are visible at the same time. If a user wants to change the state of an existing object (e.g., to change the origin of an already existing camera) it is perfectly valid to do so, as long as the changed parameters are recommitted. - -The commit semantic allow for batching up multiple small changes, and specifies exactly when changes to objects will occur. This can impact performance and consistency for devices crossing a PCI bus or across a network. - -Note that OSPRay uses reference counting to manage the lifetime of all objects, so one cannot explicitly “delete” any object. Instead, to indicate that the application does not need and does not access the given object anymore, call - -.. code:: cpp - - void ospRelease(OSPObject); - -This decreases its reference count and if the count reaches ``0`` the object will automatically get deleted. Passing ``NULL`` is not an error. Note that every handle returned via the API needs to be released when the object is no longer needed, to avoid memory leaks. - -Sometimes applications may want to have more than one reference to an object, where it is desirable for the application to increment the reference count of an object. This is done with - -.. code:: cpp - - void ospRetain(OSPObject); - -It is important to note that this is only necessary if the application wants to call ``ospRelease`` on an object more than once: objects which contain other objects as parameters internally increment/decrement ref counts and should not be explicitly done by the application. - -Parameters -~~~~~~~~~~ - -Parameters allow to configure the behavior of and to pass data to objects. However, objects do *not* have an explicit interface for reasons of high flexibility and a more stable compile-time API. Instead, parameters are passed separately to objects in an arbitrary order, and unknown parameters will simply be ignored (though a warning message will be posted). The following function allows adding various types of parameters with name ``id`` to a given object: - -.. code:: cpp - - void ospSetParam(OSPObject, const char *id, OSPDataType type, const void *mem); - -The valid parameter names for all ``OSPObject``\ s and what types are valid are discussed in future sections. - -Note that ``mem`` must always be a pointer *to* the object, otherwise accidental type casting can occur. This is especially true for pointer types (``OSP_VOID_PTR`` and ``OSPObject`` handles), as they will implicitly cast to ``void\ *``, but be incorrectly interpreted. To help with some of these issues, there also exist variants of ``ospSetParam`` for specific types, such as ``ospSetInt`` and ``ospSetVec3f`` in the OSPRay utility library (found in ``ospray_util.h``). Note that half precision float parameters ``OSP_HALF, OSP_VEC[234]H`` are not supported. - -Users can also remove parameters that have been explicitly set from ``ospSetParam``. Any parameters which have been removed will go back to their default value during the next commit unless a new parameter was set after the parameter was removed. To remove a parameter, use - -.. code:: cpp - - void ospRemoveParam(OSPObject, const char *id); - -Data -~~~~ - -OSPRay consumes data arrays from the application using a specific object type, ``OSPData``. There are several components to describing a data array: element type, 1/2/3 dimensional striding, and whether the array is shared with the application or copied into opaque, OSPRay-owned memory. - -Shared data arrays require that the application’s array memory outlives the lifetime of the created ``OSPData``, as OSPRay is referring to application memory. Where this is not preferable, applications use opaque arrays to allow the ``OSPData`` to own the lifetime of the array memory. However, opaque arrays dictate the cost of copying data into it, which should be kept in mind. - -Thus, the most efficient way to specify a data array from the application is to created a shared data array, which is done with - -.. code:: cpp - - OSPData ospNewSharedData(const void *sharedData, - OSPDataType, - uint64_t numItems1, - int64_t byteStride1 = 0, - uint64_t numItems2 = 1, - int64_t byteStride2 = 0, - uint64_t numItems3 = 1, - int64_t byteStride3 = 0); - -The call returns an ``OSPData`` handle to the created array. The calling program guarantees that the ``sharedData`` pointer will remain valid for the duration that this data array is being used. The number of elements ``numItems`` must be positive (there cannot be an empty data object). The data is arranged in three dimensions, with specializations to two or one dimension (if some ``numItems`` are 1). The distance between consecutive elements (per dimension) is given in bytes with ``byteStride`` and can also be negative. If ``byteStride`` is zero it will be determined automatically (e.g., as ``sizeof(type)``). Strides do not need to be ordered, i.e., ``byteStride2`` can be smaller than ``byteStride1``, which is equivalent to a transpose. However, if the stride should be calculated, then an ordering in dimensions is assumed to disambiguate, i.e., ``byteStride1 < byteStride2 < byteStride3``. - -The enum type ``OSPDataType`` describes the different element types that can be represented in OSPRay; valid constants are listed in the table below. - -.. table:: Valid named constants for ``OSPDataType``. - - +----------------------------+----------------------------------------------------------------------------------------------+ - | Type/Name | Description | - +============================+==============================================================================================+ - | OSP_DEVICE | API device object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_DATA | data reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_OBJECT | generic object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_CAMERA | camera object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_FRAMEBUFFER | framebuffer object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_LIGHT | light object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_MATERIAL | material object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_TEXTURE | texture object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_RENDERER | renderer object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_WORLD | world object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_GEOMETRY | geometry object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_VOLUME | volume object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_TRANSFER_FUNCTION | transfer function object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_IMAGE_OPERATION | image operation object reference | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_STRING | C-style zero-terminated character string | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_CHAR, OSP_VEC[234]C | 8 bit signed character scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_UCHAR, OSP_VEC[234]UC | 8 bit unsigned character scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_SHORT, OSP_VEC[234]S | 16 bit unsigned integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_USHORT, OSP_VEC[234]US | 16 bit unsigned integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_INT, OSP_VEC[234]I | 32 bit signed integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_UINT, OSP_VEC[234]UI | 32 bit unsigned integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_LONG, OSP_VEC[234]L | 64 bit signed integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_ULONG, OSP_VEC[234]UL | 64 bit unsigned integer scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_HALF, OSP_VEC[234]H | 16 bit half precision floating-point scalar and [234]-element vector (IEEE 754 ``binary16``) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_FLOAT, OSP_VEC[234]F | 32 bit single precision floating-point scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_DOUBLE, OSP_VEC[234]D | 64 bit double precision floating-point scalar and [234]-element vector | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_BOX[1234]I | 32 bit integer box (lower + upper bounds) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_BOX[1234]F | 32 bit single precision floating-point box (lower + upper bounds) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_LINEAR[23]F | 32 bit single precision floating-point linear transform ([23] vectors) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_AFFINE[23]F | 32 bit single precision floating-point affine transform (linear transform plus translation) | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_QUATF | 32 bit single precision floating-point quaternion, in :math:`(i, j, k, w)` layout | - +----------------------------+----------------------------------------------------------------------------------------------+ - | OSP_VOID_PTR | raw memory address (only found in module extensions) | - +----------------------------+----------------------------------------------------------------------------------------------+ - -If the elements of the array are handles to objects, then their reference counter is incremented. - -An opaque ``OSPData`` with memory allocated by OSPRay is created with - -.. code:: cpp - - OSPData ospNewData(OSPDataType, - uint64_t numItems1, - uint64_t numItems2 = 1, - uint64_t numItems3 = 1); - -To allow for (partial) copies or updates of data arrays use - -.. code:: cpp - - void ospCopyData(const OSPData source, - OSPData destination, - uint64_t destinationIndex1 = 0, - uint64_t destinationIndex2 = 0, - uint64_t destinationIndex3 = 0); - -which will copy the whole [1]_ content of the ``source`` array into ``destination`` at the given location ``destinationIndex``. The ``OSPDataType``\ s of the data objects must match. The region to be copied must be valid inside the destination, i.e., in all dimensions, ``destinationIndex + sourceSize <= destinationSize``. The affected region ``[destinationIndex, destinationIndex + sourceSize)`` is marked as dirty, which may be used by OSPRay to only process or update that sub-region (e.g., updating an acceleration structure). If the destination array is shared with OSPData by the application (created with ``ospNewSharedData``), then - -- the source array must be shared as well (thus ``ospCopyData`` cannot be used to read opaque data) -- if source and destination memory overlaps (aliasing), then behavior is undefined -- except if source and destination regions are identical (including matching strides), which can be used by application to mark that region as dirty (instead of the whole ``OSPData``) - -To add a data array as parameter named ``id`` to another object call also use - -.. code:: cpp - - void ospSetObject(OSPObject, const char *id, OSPData); - -Volumes -------- - -Volumes are volumetric data sets with discretely sampled values in 3D space, typically a 3D scalar field. To create a new volume object of given type ``type`` use - -.. code:: cpp - - OSPVolume ospNewVolume(const char *type); - -Note that OSPRay’s implementation forwards ``type`` directly to Open VKL, allowing new Open VKL volume types to be usable within OSPRay without the need to change (or even recompile) OSPRay. - -Structured Regular Volume -~~~~~~~~~~~~~~~~~~~~~~~~~ - -Structured volumes only need to store the values of the samples, because their addresses in memory can be easily computed from a 3D position. A common type of structured volumes are regular grids. - -Structured regular volumes are created by passing the type string “``structuredRegular``” to ``ospNewVolume``. Structured volumes are represented through an ``OSPData`` 3D array ``data`` (which may or may not be shared with the application). The voxel data must be laid out in xyz-order [2]_ and can be compact (best for performance) or can have a stride between voxels, specified through the ``byteStride1`` parameter when creating the ``OSPData``. Only 1D strides are supported, additional strides between scanlines (2D, ``byteStride2``) and slices (3D, ``byteStride3``) are not. - -The parameters understood by structured volumes are summarized in the table below. - -.. table:: Configuration parameters for structured regular volumes. - - +---------+----------------+---------------------------------+----------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=========+================+=================================+============================================================================================================================+ - | vec3f | gridOrigin | :math:`(0, 0, 0)` | origin of the grid in object-space | - +---------+----------------+---------------------------------+----------------------------------------------------------------------------------------------------------------------------+ - | vec3f | gridSpacing | :math:`(1, 1, 1)` | size of the grid cells in object-space | - +---------+----------------+---------------------------------+----------------------------------------------------------------------------------------------------------------------------+ - | OSPData | data | | the actual voxel 3D `data <#data>`__ | - +---------+----------------+---------------------------------+----------------------------------------------------------------------------------------------------------------------------+ - | int | filter | ``OSP_VOLUME_FILTER_TRILINEAR`` | filter used for reconstructing the field, also allowed is ``OSP_VOLUME_FILTER_NEAREST`` and ``OSP_VOLUME_FILTER_TRICUBIC`` | - +---------+----------------+---------------------------------+----------------------------------------------------------------------------------------------------------------------------+ - | int | gradientFilter | same as ``filter`` | filter used during gradient computations | - +---------+----------------+---------------------------------+----------------------------------------------------------------------------------------------------------------------------+ - | float | background | ``NaN`` | value that is used when sampling an undefined region outside the volume domain | - +---------+----------------+---------------------------------+----------------------------------------------------------------------------------------------------------------------------+ - -The size of the volume is inferred from the size of the 3D array ``data``, as is the type of the voxel values (currently supported are: ``OSP_UCHAR``, ``OSP_SHORT``, ``OSP_USHORT``, ``OSP_HALF``, ``OSP_FLOAT``, and ``OSP_DOUBLE``). - -Structured Spherical Volume -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Structured spherical volumes are also supported, which are created by passing a type string of “``structuredSpherical``” to ``ospNewVolume``. The grid dimensions and parameters are defined in terms of radial distance :math:`r`, inclination angle :math:`\theta`, and azimuthal angle :math:`\phi`, conforming with the ISO convention for spherical coordinate systems. The coordinate system and parameters understood by structured spherical volumes are summarized below. - -.. figure:: https://ospray.github.io/images/structured_spherical_coords.svg - :alt: Coordinate system of structured spherical volumes. - :width: 60.0% - - Coordinate system of structured spherical volumes. - -.. table:: Configuration parameters for structured spherical volumes. - - +---------+----------------+---------------------------------+-----------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=========+================+=================================+=========================================================================================+ - | vec3f | gridOrigin | :math:`(0, 0, 0)` | origin of the grid in units of :math:`(r, \theta, \phi)`; angles in degrees | - +---------+----------------+---------------------------------+-----------------------------------------------------------------------------------------+ - | vec3f | gridSpacing | :math:`(1, 1, 1)` | size of the grid cells in units of :math:`(r, \theta, \phi)`; angles in degrees | - +---------+----------------+---------------------------------+-----------------------------------------------------------------------------------------+ - | OSPData | data | | the actual voxel 3D `data <#data>`__ | - +---------+----------------+---------------------------------+-----------------------------------------------------------------------------------------+ - | int | filter | ``OSP_VOLUME_FILTER_TRILINEAR`` | filter used for reconstructing the field, also allowed is ``OSP_VOLUME_FILTER_NEAREST`` | - +---------+----------------+---------------------------------+-----------------------------------------------------------------------------------------+ - | int | gradientFilter | same as ``filter`` | filter used during gradient computations | - +---------+----------------+---------------------------------+-----------------------------------------------------------------------------------------+ - | float | background | ``NaN`` | value that is used when sampling an undefined region outside the volume domain | - +---------+----------------+---------------------------------+-----------------------------------------------------------------------------------------+ - -The dimensions :math:`(r, \theta, \phi)` of the volume are inferred from the size of the 3D array ``data``, as is the type of the voxel values (currently supported are: ``OSP_UCHAR``, ``OSP_SHORT``, ``OSP_USHORT``, ``OSP_HALF``, ``OSP_FLOAT``, and ``OSP_DOUBLE``). - -These grid parameters support flexible specification of spheres, hemispheres, spherical shells, spherical wedges, and so forth. The grid extents (computed as ``[gridOrigin, gridOrigin + (dimensions - 1) * gridSpacing]``) however must be constrained such that: - -- :math:`r \geq 0` -- :math:`0 \leq \theta \leq 180` -- :math:`0 \leq \phi \leq 360` - -Adaptive Mesh Refinement (AMR) Volume -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -OSPRay currently supports block-structured (Berger-Colella) AMR volumes. Volumes are specified as a list of blocks, which exist at levels of refinement in potentially overlapping regions. Blocks exist in a tree structure, with coarser refinement level blocks containing finer blocks. The cell width is equal for all blocks at the same refinement level, though blocks at a coarser level have a larger cell width than finer levels. - -There can be any number of refinement levels and any number of blocks at any level of refinement. An AMR volume type is created by passing the type string “``amr``” to ``ospNewVolume``. - -Blocks are defined by three parameters: their bounds, the refinement level in which they reside, and the scalar data contained within each block. - -Note that cell widths are defined *per refinement level*, not per block. - -.. table:: Configuration parameters for AMR volumes. - - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +==================+==============+=====================+===============================================================================================================================+ - | ``OSPAMRMethod`` | method | ``OSP_AMR_CURRENT`` | ``OSPAMRMethod`` sampling method. Supported methods are: | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_AMR_CURRENT`` | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_AMR_FINEST`` | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_AMR_OCTANT`` | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | float[] | cellWidth | NULL | array of each level’s cell width | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | box3i[] | block.bounds | NULL | `data <#data>`__ array of grid sizes (in voxels) for each AMR block | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | int[] | block.level | NULL | array of each block’s refinement level | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | OSPData[] | block.data | NULL | `data <#data>`__ array of OSPData containing the actual scalar voxel data, only ``OSP_FLOAT`` is supported as ``OSPDataType`` | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | gridOrigin | :math:`(0, 0, 0)` | origin of the grid | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | gridSpacing | :math:`(1, 1, 1)` | size of the grid cells | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | float | background | ``NaN`` | value that is used when sampling an undefined region outside the volume domain | - +------------------+--------------+---------------------+-------------------------------------------------------------------------------------------------------------------------------+ - -Lastly, note that the ``gridOrigin`` and ``gridSpacing`` parameters act just like the structured volume equivalent, but they only modify the root (coarsest level) of refinement. - -In particular, OSPRay’s / Open VKL’s AMR implementation was designed to cover Berger-Colella [1] and Chombo [2] AMR data. The ``method`` parameter above determines the interpolation method used when sampling the volume. - -OSP_AMR_CURRENT - finds the finest refinement level at that cell and interpolates through this “current” level -OSP_AMR_FINEST - will interpolate at the closest existing cell in the volume-wide finest refinement level regardless of the sample cell’s level -OSP_AMR_OCTANT - interpolates through all available refinement levels at that cell. This method avoids discontinuities at refinement level boundaries at the cost of performance - -Details and more information can be found in the publication for the implementation [3]. - -1. M.J. Berger and P. Colella, “Local adaptive mesh refinement for shock hydrodynamics.” Journal of Computational Physics 82.1 (1989): 64-84. DOI: 10.1016/0021-9991(89)90035-1 -2. M. Adams, P. Colella, D.T. Graves, J.N. Johnson, N.D. Keen, T.J. Ligocki, D.F. Martin. P.W. McCorquodale, D. Modiano. P.O. Schwartz, T.D. Sternberg, and B. Van Straalen, “Chombo Software Package for AMR Applications – Design Document”, Lawrence Berkeley National Laboratory Technical Report LBNL-6616E. -3. I. Wald, C. Brownlee, W. Usher, and A. Knoll, “CPU volume rendering of adaptive mesh refinement data”. SIGGRAPH Asia 2017 Symposium on Visualization – SA ’17, 18(8), 1–8. DOI: 10.1145/3139295.3139305 - -Unstructured Volume -~~~~~~~~~~~~~~~~~~~ - -Unstructured volumes can have their topology and geometry freely defined. Geometry can be composed of tetrahedral, hexahedral, wedge or pyramid cell types. The data format used is compatible with VTK and consists of multiple arrays: vertex positions and values, vertex indices, cell start indices, cell types, and cell values. An unstructured volume type is created by passing the type string “``unstructured``” to ``ospNewVolume``. - -Sampled cell values can be specified either per-vertex (``vertex.data``) or per-cell (``cell.data``). If both arrays are set, ``cell.data`` takes precedence. - -Similar to a mesh, each cell is formed by a group of indices into the vertices. For each vertex, the corresponding (by array index) data value will be used for sampling when rendering, if specified. The index order for a tetrahedron is the same as ``VTK_TETRA``: bottom triangle counterclockwise, then the top vertex. - -For hexahedral cells, each hexahedron is formed by a group of eight indices into the vertices and data values. Vertex ordering is the same as ``VTK_HEXAHEDRON``: four bottom vertices counterclockwise, then top four counterclockwise. - -For wedge cells, each wedge is formed by a group of six indices into the vertices and data values. Vertex ordering is the same as ``VTK_WEDGE``: three bottom vertices counterclockwise, then top three counterclockwise. - -For pyramid cells, each cell is formed by a group of five indices into the vertices and data values. Vertex ordering is the same as ``VTK_PYRAMID``: four bottom vertices counterclockwise, then the top vertex. - -To maintain VTK data compatibility, the ``index`` array may be specified with cell sizes interleaved with vertex indices in the following format: :math:`n, id_1, ..., id_n, m, id_1, ..., id_m`. This alternative ``index`` array layout can be enabled through the ``indexPrefixed`` flag (in which case, the ``cell.type`` parameter must be omitted). - -.. table:: Configuration parameters for unstructured volumes. - - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=====================+====================+=========+=========================================================================================================================================================+ - | vec3f[] | vertex.position | | `data <#data>`__ array of vertex positions | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | vertex.data | | `data <#data>`__ array of vertex data values to be sampled | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint32[] / uint64[] | index | | `data <#data>`__ array of indices (into the vertex array(s)) that form cells | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | indexPrefixed | false | indicates that the ``index`` array is compatible to VTK, where the indices of each cell are prefixed with the number of vertices | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint32[] / uint64[] | cell.index | | `data <#data>`__ array of locations (into the index array), specifying the first index of each cell | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | cell.data | | `data <#data>`__ array of cell data values to be sampled | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint8[] | cell.type | | `data <#data>`__ array of cell types (VTK compatible), only set if ``indexPrefixed = false`` false. Supported types are: | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_TETRAHEDRON`` | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_HEXAHEDRON`` | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_WEDGE`` | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_PYRAMID`` | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | hexIterative | false | hexahedron interpolation method, defaults to fast non-iterative version which could have rendering inaccuracies may appear if hex is not parallelepiped | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | precomputedNormals | false | whether to accelerate by precomputing, at a cost of 12 bytes/face | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | background | ``NaN`` | value that is used when sampling an undefined region outside the volume domain | - +---------------------+--------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------------------------+ - -VDB Volume -~~~~~~~~~~ - -VDB volumes implement a data structure that is very similar to the data structure outlined in Museth [1], they are created by passing the type string “``vdb``” to ``ospNewVolume``. - -The data structure is a hierarchical regular grid at its core: Nodes are regular grids, and each grid cell may either store a constant value (this is called a tile), or child pointers. Nodes in VDB trees are wide: Nodes on the first level have a resolution of 32\ :sup:`3` voxels, on the next level 16\ :sup:`3`, and on the leaf level 8\ :sup:`3` voxels. All nodes on a given level have the same resolution. This makes it easy to find the node containing a coordinate using shift operations (see [1]). VDB leaf nodes are implicit in OSPRay / Open VKL: they are stored as pointers to user-provided data. - -.. figure:: https://ospray.github.io/images/vdb_structure.png - :alt: Topology of VDB volumes. - :width: 80.0% - - Topology of VDB volumes. - -VDB volumes interpret input data as constant cells (which are then potentially filtered). This is in contrast to ``structuredRegular`` volumes, which have a vertex-centered interpretation. - -The VDB implementation in OSPRay / Open VKL follows the following goals: - -- Efficient data structure traversal on vector architectures. -- Enable the use of industry-standard ``.vdb`` files created through the OpenVDB library. -- Compatibility with OpenVDB on a leaf data level, so that ``.vdb`` file may be loaded with minimal overhead. - -VDB volumes have the following parameters: - -.. table:: Configuration parameters for VDB volumes. - - +-----------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Description | - +===========+==================+===============================================================================================================================================================================================================================================================================================================================================+ - | int | maxSamplingDepth | do not descend further than to this depth during sampling, the maximum value and the default is 3 | - +-----------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint32[] | node.level | level on which each input node exists, may be 1, 2 or 3 (levels are counted from the root level = 0 down) | - +-----------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3i[] | node.origin | the node origin index (per input node) | - +-----------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPData[] | node.data | `data <#data>`__ arrays with the node data (per input node). Nodes that are tiles are expected to have single-item arrays. Leaf-nodes with grid data expected to have compact 3D arrays in zyx layout (z changes most quickly) with the correct number of voxels for the ``level``. Only ``OSP_FLOAT`` is supported as field ``OSPDataType``. | - +-----------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | int | filter | filter used for reconstructing the field, default is ``OSP_VOLUME_FILTER_TRILINEAR``, alternatively ``OSP_VOLUME_FILTER_NEAREST``, or ``OSP_VOLUME_FILTER_TRICUBIC``. | - +-----------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | int | gradientFilter | filter used for reconstructing the field during gradient computations, default same as ``filter`` | - +-----------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | background | value that is used when sampling an undefined region outside the volume domain, default ``NaN`` | - +-----------+------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -1. Museth, K. VDB: High-Resolution Sparse Volumes with Dynamic Topology. ACM Transactions on Graphics 32(3), 2013. DOI: 10.1145/2487228.2487235 - -Particle Volume -~~~~~~~~~~~~~~~ - -Particle volumes consist of a set of points in space. Each point has a position, a radius, and a weight typically associated with an attribute. Particle volumes are created by passing the type string “``particle``” to ``ospNewVolume``. - -A radial basis function defines the contribution of that particle. Currently, we use the Gaussian radial basis function - -.. math:: \phi(P) = w \exp\left(-\frac{(P - p)^2}{2 r^2}\right), - -\ where :math:`P` is the particle position, :math:`p` is the sample position, :math:`r` is the radius and :math:`w` is the weight. At each sample, the scalar field value is then computed as the sum of each radial basis function :math:`\phi`, for each particle that overlaps it. - -The OSPRay / Open VKL implementation is similar to direct evaluation of samples in Reda et al. [2]. It uses an Embree-built BVH with a custom traversal, similar to the method in [1]. - -.. table:: Configuration parameters for particle volumes. - - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=========+=========================+=========+====================================================================================================================================================================================================================================================================================================================================================================================================================+ - | vec3f[] | particle.position | | `data <#data>`__ array of particle positions | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | particle.radius | | `data <#data>`__ array of particle radii | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | particle.weight | NULL | optional `data <#data>`__ array of particle weights, specifying the height of the kernel. | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | radiusSupportFactor | 3.0 | The multiplier of the particle radius required for support. Larger radii ensure smooth results at the cost of performance. In the Gaussian kernel, the radius is one standard deviation (:math:`\sigma`), so a value of 3 corresponds to :math:`3 \sigma`. | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | clampMaxCumulativeValue | 0 | The maximum cumulative value possible, set by user. All cumulative values will be clamped to this, and further traversal (RBF summation) of particle contributions will halt when this value is reached. A value of zero or less turns this off. | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | estimateValueRanges | true | Enable heuristic estimation of value ranges which are used in internal acceleration structures as well as for determining the volume’s overall value range. When set to ``false``, the user *must* specify ``clampMaxCumulativeValue``, and all value ranges will be assumed [0–``clampMaxCumulativeValue``]. Disabling this switch may improve volume commit time, but will make volume rendering less efficient. | - +---------+-------------------------+---------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -1. A. Knoll, I. Wald, P. Navratil, A. Bowen, K. Reda, M.E., Papka, and K. Gaither, “RBF Volume Ray Casting on Multicore and Manycore CPUs”, 2014, Computer Graphics Forum, 33: 71–80. doi:10.1111/cgf.12363 - -2. K. Reda, A. Knoll, K. Nomura, M. E. Papka, A. E. Johnson and J. Leigh, “Visualizing large-scale atomistic simulations in ultra-resolution immersive environments”, 2013 IEEE Symposium on Large-Scale Data Analysis and Visualization (LDAV), Atlanta, GA, 2013, pp. 59–65. - -Transfer Function -~~~~~~~~~~~~~~~~~ - -Transfer functions map the scalar values of volumes to color and opacity and thus they can be used to visually emphasize certain features of the volume. To create a new transfer function of given type ``type`` use - -.. code:: cpp - - OSPTransferFunction ospNewTransferFunction(const char *type); - -The returned handle can be assigned to a volumetric model (described below) as parameter “``transferFunction``” using ``ospSetObject``. - -One type of transfer function that is supported by OSPRay is the linear transfer function, which interpolates between given equidistant colors and opacities. It is create by passing the string “``piecewiseLinear``” to ``ospNewTransferFunction`` and it is controlled by these parameters: - -.. table:: Parameters accepted by the linear transfer function. - - ======= ========== ============================================= - Type Name Description - ======= ========== ============================================= - vec3f[] color `data <#data>`__ array of colors (linear RGB) - float[] opacity `data <#data>`__ array of opacities - vec2f valueRange domain (scalar range) this function maps from - ======= ========== ============================================= - -The arrays ``color`` and ``opacity`` can be of different length. - -VolumetricModels -~~~~~~~~~~~~~~~~ - -Volumes in OSPRay are given volume rendering appearance information through VolumetricModels. This decouples the physical representation of the volume (and possible acceleration structures it contains) to rendering-specific parameters (where more than one set may exist concurrently). To create a volume instance, call - -.. code:: cpp - - OSPVolumetricModel ospNewVolumetricModel(OSPVolume volume); - -The passed volume can be ``NULL`` as long as the volume to be used is passed as a parameter. If both a volume is specified on object creation and as a parameter, the parameter value is used. If the parameter value is later removed, the volume object passed on object creation is again used. - -.. table:: Parameters understood by VolumetricModel. - - +---------------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=====================+==================+=========+=======================================================================================================================================+ - | OSPTransferFunction | transferFunction | | `transfer function <#transfer-function>`__ to use | - +---------------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - | float | densityScale | 1.0 | makes volumes uniformly thinner or thicker | - +---------------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - | float | anisotropy | 0.0 | anisotropy of the (Henyey-Greenstein) phase function in [-1–1] (`path tracer <#path-tracer>`__ only), default to isotropic scattering | - +---------------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - | OSPVolume | volume | | optional `volume <#volumes>`__ object this model references | - +---------------------+------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - -Geometries ----------- - -Geometries in OSPRay are objects that describe intersectable surfaces. To create a new geometry object of given type ``type`` use - -.. code:: cpp - - OSPGeometry ospNewGeometry(const char *type); - -Note that in the current implementation geometries are limited to a maximum of 2\ :sup:`32` primitives. - -Mesh -~~~~ - -A mesh consisting of either triangles or quads is created by calling ``ospNewGeometry`` with type string “``mesh``”. Once created, a mesh recognizes the following parameters: - -.. table:: Parameters defining a mesh geometry. - - +---------------------+-----------------+----------------------------------------------------------------------------------------+ - | Type | Name | Description | - +=====================+=================+========================================================================================+ - | vec3f[] | vertex.position | `data <#data>`__ array of vertex positions | - +---------------------+-----------------+----------------------------------------------------------------------------------------+ - | vec3f[] | vertex.normal | `data <#data>`__ array of vertex normals | - +---------------------+-----------------+----------------------------------------------------------------------------------------+ - | vec4f[] / vec3f[] | vertex.color | `data <#data>`__ array of vertex colors (linear RGBA/RGB) | - +---------------------+-----------------+----------------------------------------------------------------------------------------+ - | vec2f[] | vertex.texcoord | `data <#data>`__ array of vertex texture coordinates | - +---------------------+-----------------+----------------------------------------------------------------------------------------+ - | vec3ui[] / vec4ui[] | index | `data <#data>`__ array of (either triangle or quad) indices (into the vertex array(s)) | - +---------------------+-----------------+----------------------------------------------------------------------------------------+ - -The data type of index arrays differentiates between the underlying geometry, triangles are used for a index with ``vec3ui`` type and quads for ``vec4ui`` type. Quads are internally handled as a pair of two triangles, thus mixing triangles and quads is supported by encoding some triangle as a quad with the last two vertex indices being identical (``w=z``). - -The ``vertex.position`` and ``index`` arrays are mandatory to create a valid mesh. - -Subdivision -~~~~~~~~~~~ - -A mesh consisting of subdivision surfaces, created by specifying a geometry of type “``subdivision``”. Once created, a subdivision recognizes the following parameters: - -.. table:: Parameters defining a Subdivision geometry. - - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Description | - +=========+=====================+==========================================================================================================================+ - | vec3f[] | vertex.position | `data <#data>`__ array of vertex positions | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | vec4f[] | vertex.color | optional `data <#data>`__ array of vertex colors (linear RGBA) | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | vec2f[] | vertex.texcoord | optional `data <#data>`__ array of vertex texture coordinates | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | float | level | global level of tessellation, default 5 | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | uint[] | index | `data <#data>`__ array of indices (into the vertex array(s)) | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | float[] | index.level | optional `data <#data>`__ array of per-edge levels of tessellation, overrides global level | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | uint[] | face | optional `data <#data>`__ array holding the number of indices/edges (3 to 15) per face, defaults to 4 (a pure quad mesh) | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | vec2i[] | edgeCrease.index | optional `data <#data>`__ array of edge crease indices | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | float[] | edgeCrease.weight | optional `data <#data>`__ array of edge crease weights | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | uint[] | vertexCrease.index | optional `data <#data>`__ array of vertex crease indices | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | float[] | vertexCrease.weight | optional `data <#data>`__ array of vertex crease weights | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | uchar | mode | subdivision edge boundary mode, supported modes are: | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | | | ``OSP_SUBDIVISION_NO_BOUNDARY`` | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | | | ``OSP_SUBDIVISION_SMOOTH_BOUNDARY`` (default) | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | | | ``OSP_SUBDIVISION_PIN_CORNERS`` | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | | | ``OSP_SUBDIVISION_PIN_BOUNDARY`` | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - | | | ``OSP_SUBDIVISION_PIN_ALL`` | - +---------+---------------------+--------------------------------------------------------------------------------------------------------------------------+ - -The ``vertex`` and ``index`` arrays are mandatory to create a valid subdivision surface. If no ``face`` array is present then a pure quad mesh is assumed (the number of indices must be a multiple of 4). Optionally supported are edge and vertex creases. - -Spheres -~~~~~~~ - -A geometry consisting of individual spheres, each of which can have an own radius, is created by calling ``ospNewGeometry`` with type string “``sphere``”. The spheres will not be tessellated but rendered procedurally and are thus perfectly round. To allow a variety of sphere representations in the application this geometry allows a flexible way of specifying the data of center position and radius within a `data <#data>`__ array: - -.. table:: Parameters defining a spheres geometry. - - +---------+-----------------+---------+------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=========+=================+=========+==============================================================================+ - | vec3f[] | sphere.position | | `data <#data>`__ array of center positions | - +---------+-----------------+---------+------------------------------------------------------------------------------+ - | float[] | sphere.radius | NULL | optional `data <#data>`__ array of the per-sphere radius | - +---------+-----------------+---------+------------------------------------------------------------------------------+ - | vec2f[] | sphere.texcoord | NULL | optional `data <#data>`__ array of texture coordinates (constant per sphere) | - +---------+-----------------+---------+------------------------------------------------------------------------------+ - | float | radius | 0.01 | default radius for all spheres (if ``sphere.radius`` is not set) | - +---------+-----------------+---------+------------------------------------------------------------------------------+ - -Curves -~~~~~~ - -A geometry consisting of multiple curves is created by calling ``ospNewGeometry`` with type string “``curve``”. The parameters defining this geometry are listed in the table below. - -.. table:: Parameters defining a curves geometry. - - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | Type | Name | Description | - +=============+========================+=====================================================================================+ - | vec4f[] | vertex.position_radius | `data <#data>`__ array of vertex position and per-vertex radius | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | vec2f[] | vertex.texcoord | `data <#data>`__ array of per-vertex texture coordinates | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | vec4f[] | vertex.color | `data <#data>`__ array of corresponding vertex colors (linear RGBA) | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | vec3f[] | vertex.normal | `data <#data>`__ array of curve normals (only for “ribbon” curves) | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | vec4f[] | vertex.tangent | `data <#data>`__ array of curve tangents (only for “hermite” curves) | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | uint32[] | index | `data <#data>`__ array of indices to the first vertex or tangent of a curve segment | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | uchar | type | ``OSPCurveType`` for rendering the curve. Supported types are: | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | | | ``OSP_FLAT`` | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | | | ``OSP_ROUND`` | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | | | ``OSP_RIBBON`` | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | | | ``OSP_DISJOINT`` | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | uchar | basis | ``OSPCurveBasis`` for defining the curve. Supported bases are: | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | | | ``OSP_LINEAR`` | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | | | ``OSP_BEZIER`` | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | | | ``OSP_BSPLINE`` | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | | | ``OSP_HERMITE`` | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - | | | ``OSP_CATMULL_ROM`` | - +-------------+------------------------+-------------------------------------------------------------------------------------+ - -Positions in ``vertex.position_radius`` parameter supports per-vertex varying radii with data type ``vec4f[]`` and instantiate Embree curves internally for the relevant type/basis mapping. - -The following section describes the properties of different curve basis’ and how they use the data provided in data buffers: - -OSP_LINEAR - The indices point to the first of 2 consecutive control points in the vertex buffer. The first control point is the start and the second control point the end of the line segment. The curve goes through all control points listed in the vertex buffer. -OSP_BEZIER - The indices point to the first of 4 consecutive control points in the vertex buffer. The first control point represents the start point of the curve, and the 4th control point the end point of the curve. The Bézier basis is interpolating, thus the curve does go exactly through the first and fourth control vertex. -OSP_BSPLINE - The indices point to the first of 4 consecutive control points in the vertex buffer. This basis is not interpolating, thus the curve does in general not go through any of the control points directly. Using this basis, 3 control points can be shared for two continuous neighboring curve segments, e.g., the curves :math:`(p0, p1, p2, p3)` and :math:`(p1, p2, p3, p4)` are C1 continuous. This feature make this basis a good choice to construct continuous multi-segment curves, as memory consumption can be kept minimal. -OSP_HERMITE - It is necessary to have both vertex buffer and tangent buffer for using this basis. The indices point to the first of 2 consecutive points in the vertex buffer, and the first of 2 consecutive tangents in the tangent buffer. This basis is interpolating, thus does exactly go through the first and second control point, and the first order derivative at the begin and end matches exactly the value specified in the tangent buffer. When connecting two segments continuously, the end point and tangent of the previous segment can be shared. -OSP_CATMULL_ROM - The indices point to the first of 4 consecutive control points in the vertex buffer. If :math:`(p0, p1, p2, p3)` represent the points then this basis goes through :math:`p1` and :math:`p2`, with tangents as :math:`(p2-p0)/2` and :math:`(p3-p1)/2`. - -The following section describes the properties of different curve types’ and how they define the geometry of a curve: - -OSP_FLAT - This type enables faster rendering as the curve is rendered as a connected sequence of ray facing quads. -OSP_ROUND - This type enables rendering a real geometric surface for the curve which allows closeup views. This mode renders a sweep surface by sweeping a varying radius circle tangential along the curve. -OSP_RIBBON - The type enables normal orientation of the curve and requires a normal buffer be specified along with vertex buffer. The curve is rendered as a flat band whose center approximately follows the provided vertex buffer and whose normal orientation approximately follows the provided normal buffer. Not supported for basis ``OSP_LINEAR``. -OSP_DISJOINT - Only supported for basis ``OSP_LINEAR``; the segments are open and not connected at the joints, i.e., the curve segments are either individual cones or cylinders. - -Boxes -~~~~~ - -OSPRay can directly render axis-aligned bounding boxes without the need to convert them to quads or triangles. To do so create a boxes geometry by calling ``ospNewGeometry`` with type string “``box``”. - -.. table:: Parameters defining a boxes geometry. - - ======= ==== =============================== - Type Name Description - ======= ==== =============================== - box3f[] box `data <#data>`__ array of boxes - ======= ==== =============================== - -Planes -~~~~~~ - -OSPRay can directly render planes defined by plane equation coefficients in its implicit form :math:`ax + by + cz + d = 0`. By default planes are infinite but their extents can be limited by defining optional bounding boxes. A planes geometry can be created by calling ``ospNewGeometry`` with type string “``plane``”. - -.. table:: Parameters defining a planes geometry. - - +---------+--------------------+-------------------------------------------------------------------+ - | Type | Name | Description | - +=========+====================+===================================================================+ - | vec4f[] | plane.coefficients | `data <#data>`__ array of plane coefficients :math:`(a, b, c, d)` | - +---------+--------------------+-------------------------------------------------------------------+ - | box3f[] | plane.bounds | optional `data <#data>`__ array of bounding boxes | - +---------+--------------------+-------------------------------------------------------------------+ - -Isosurfaces -~~~~~~~~~~~ - -OSPRay can directly render multiple isosurfaces of a volume without first tessellating them. To do so create an isosurfaces geometry by calling ``ospNewGeometry`` with type string “``isosurface``”. The appearance information of the surfaces is set through the Geometric Model. Per-isosurface colors can be set by passing per-primitive colors to the Geometric Model, in order of the isosurface array. - -.. table:: Parameters defining an isosurfaces geometry. - - ========= ======== ===================================================== - Type Name Description - ========= ======== ===================================================== - float isovalue single isovalues - float[] isovalue `data <#data>`__ array of isovalues - OSPVolume volume handle of the `Volume <#volumes>`__ to be isosurfaced - ========= ======== ===================================================== - -GeometricModels -~~~~~~~~~~~~~~~ - -Geometries are matched with surface appearance information through GeometricModels. These take a geometry, which defines the surface representation, and applies either full-object or per-primitive color and material information. To create a geometric model, call - -.. code:: cpp - - OSPGeometricModel ospNewGeometricModel(OSPGeometry geometry); - -The passed geometry can be ``NULL`` as long as the geometry to be used is passed as a parameter. If both a geometry is specified on object creation and as a parameter, the parameter value is used. If the parameter value is later removed, the geometry object passed on object creation is again used. - -Color and material are fetched with the primitive ID of the hit (clamped to the valid range, thus a single color or material is fine), or mapped first via the ``index`` array (if present). All parameters are optional, however, some renderers (notably the `path tracer <#path-tracer>`__) require a material to be set. Materials are either handles of ``OSPMaterial``, or indices into the ``material`` array on the `renderer <#renderers>`__, which allows to build a `world <#world>`__ which can be used by different types of renderers. - -An ``invertNormals`` flag allows to invert (shading) normal vectors of the rendered geometry. That is particularly useful for clipping. By changing normal vectors orientation one can control whether inside or outside of the clipping geometry is being removed. For example, a clipping geometry with normals oriented outside clips everything what’s inside. - -.. table:: Parameters understood by GeometricModel. - - +--------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Description | - +==========================+===============+=============================================================================================================================================================+ - | OSPMaterial / uint32 | material | optional `material <#materials>`__ applied to the geometry, may be an index into the ``material`` parameter on the `renderer <#renderers>`__ (if it exists) | - +--------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec4f | color | optional color assigned to the geometry (linear RGBA) | - +--------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPMaterial[] / uint32[] | material | optional `data <#data>`__ array of (per-primitive) materials, may be an index into the ``material`` parameter on the renderer (if it exists) | - +--------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec4f[] | color | optional `data <#data>`__ array of (per-primitive) colors (linear RGBA) | - +--------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint8[] | index | optional `data <#data>`__ array of per-primitive indices into ``color`` and ``material`` | - +--------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | invertNormals | inverts all shading normals (Ns), default false | - +--------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPGeometry | geometry | optional [geometry] object this model references | - +--------------------------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Lights ------- - -To create a new light source of given type ``type`` use - -.. code:: cpp - - OSPLight ospNewLight(const char *type); - -All light sources accept the following parameters: - -.. table:: Parameters accepted by all lights. - - +-------+-------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=======+===================+=========+=======================================================================================================================================+ - | vec3f | color | white | color of the light (linear RGB) | - +-------+-------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - | float | intensity | 1 | intensity of the light (a factor) | - +-------+-------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - | uchar | intensityQuantity | | ``OSPIntensityQuantity`` to set the radiometric quantity represented by ``intensity``. The default value depends on the light source. | - +-------+-------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - | bool | visible | true | whether the light can be directly seen | - +-------+-------------------+---------+---------------------------------------------------------------------------------------------------------------------------------------+ - -In OSPRay the ``intensity`` parameter of a light source can correspond to different types of radiometric quantities. The type of the value represented by a light’s ``intensity`` parameter is set using ``intensityQuantity``, which accepts values from the enum type ``OSPIntensityQuantity``. The supported types of ``OSPIntensityQuantity`` differ between the different light sources (see documentation of each specific light source). - -.. table:: Types of radiometric quantities used to interpret a light’s ``intensity`` parameter. - - +-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | Name | Description | - +===================================+===============================================================================================================================+ - | OSP_INTENSITY_QUANTITY_POWER | the overall amount of light energy emitted by the light source into the scene, unit is W | - +-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | OSP_INTENSITY_QUANTITY_INTENSITY | the overall amount of light emitted by the light in a given direction, unit is W/sr | - +-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | OSP_INTENSITY_QUANTITY_RADIANCE | the amount of light emitted by a point on the light source in a given direction, unit is W/sr/m\ :sup:`2` | - +-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | OSP_INTENSITY_QUANTITY_IRRADIANCE | the amount of light arriving at a surface point, assuming the light is oriented towards to the surface, unit is W/m\ :sup:`2` | - +-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ - | OSP_INTENSITY_QUANTITY_SCALE | a linear scaling factor for light sources with a built-in quantity (e.g., ``HDRI``, or ``sunSky``). | - +-----------------------------------+-------------------------------------------------------------------------------------------------------------------------------+ - -The following light types are supported by most OSPRay renderers. - -Directional Light / Distant Light -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The distant light (or traditionally the directional light) is thought to be far away (outside of the scene), thus its light arrives (almost) as parallel rays. It is created by passing the type string “``distant``” to ``ospNewLight``. The distant light supports ``OSP_INTENSITY_QUANTITY_RADIANCE`` and ``OSP_INTENSITY_QUANTITY_IRRADIANCE`` (default) as ``intensityQuantity`` parameter value. In addition to the `general parameters <#lights>`__ understood by all lights the distant light supports the following special parameters: - -.. table:: Special parameters accepted by the distant light. - - +-------+-----------------+-------------------+----------------------------------------------+ - | Type | Name | Default | Description | - +=======+=================+===================+==============================================+ - | vec3f | direction | :math:`(0, 0, 1)` | main emission direction of the distant light | - +-------+-----------------+-------------------+----------------------------------------------+ - | float | angularDiameter | 0 | apparent size (angle in degree) of the light | - +-------+-----------------+-------------------+----------------------------------------------+ - -Setting the angular diameter to a value greater than zero will result in soft shadows when the renderer uses stochastic sampling (like the `path tracer <#path-tracer>`__). For instance, the apparent size of the sun is about 0.53°. - -Point Light / Sphere Light -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The sphere light (or the special case point light) is a light emitting uniformly in all directions from the surface toward the outside. It does not emit any light toward the inside of the sphere. It is created by passing the type string “``sphere``” to ``ospNewLight``. The point light supports ``OSP_INTENSITY_QUANTITY_POWER``, ``OSP_INTENSITY_QUANTITY_INTENSITY`` (default) and ``OSP_INTENSITY_QUANTITY_RADIANCE`` as ``intensityQuantity`` parameter value. In addition to the `general parameters <#lights>`__ understood by all lights the sphere light supports the following special parameters: - -.. table:: Special parameters accepted by the sphere light. - - ===== ======== ================= ============================== - Type Name Default Description - ===== ======== ================= ============================== - vec3f position :math:`(0, 0, 0)` the center of the sphere light - float radius 0 the size of the sphere light - ===== ======== ================= ============================== - -Setting the radius to a value greater than zero will result in soft shadows when the renderer uses stochastic sampling (like the `path tracer <#path-tracer>`__). - -Spotlight / Photometric Light -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The spotlight is a light emitting into a cone of directions. It is created by passing the type string “``spot``” to ``ospNewLight``. The spotlight supports ``OSP_INTENSITY_QUANTITY_POWER``, ``OSP_INTENSITY_QUANTITY_INTENSITY`` (default) and ``OSP_INTENSITY_QUANTITY_RADIANCE`` as ``intensityQuantity`` parameter value. In addition to the `general parameters <#lights>`__ understood by all lights the spotlight supports the special parameters listed in the table. - -.. table:: Special parameters accepted by the spotlight. - - +---------+-----------------------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=========+=======================+===================+=================================================================================================================================================================================+ - | vec3f | position | :math:`(0, 0, 0)` | the center of the spotlight | - +---------+-----------------------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | direction | :math:`(0, 0, 1)` | main emission direction of the spot | - +---------+-----------------------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | openingAngle | 180 | full opening angle (in degree) of the spot; outside of this cone is no illumination | - +---------+-----------------------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | penumbraAngle | 5 | size (angle in degree) of the “penumbra”, the region between the rim (of the illumination cone) and full intensity of the spot; should be smaller than half of ``openingAngle`` | - +---------+-----------------------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | radius | 0 | the size of the spotlight, the radius of a disk with normal ``direction`` | - +---------+-----------------------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | innerRadius | 0 | in combination with ``radius`` turns the disk into a ring | - +---------+-----------------------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float[] | intensityDistribution | | luminous intensity distribution for photometric lights; can be 2D for asymmetric illumination; values are assumed to be uniformly distributed | - +---------+-----------------------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | c0 | | orientation, i.e., direction of the C0-(half)plane (only needed if illumination via ``intensityDistribution`` is asymmetric) | - +---------+-----------------------+-------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -.. figure:: https://ospray.github.io/images/spot_light.png - :alt: Angles used by the spotlight. - - Angles used by the spotlight. - -Setting the radius to a value greater than zero will result in soft shadows when the renderer uses stochastic sampling (like the `path tracer <#path-tracer>`__). Additionally setting the inner radius will result in a ring instead of a disk emitting the light. - -Measured light sources (IES, EULUMDAT, …) are supported by providing an ``intensityDistribution`` `data <#data>`__ array to modulate the intensity per direction. The mapping is using the C-γ coordinate system (see also below figure): the values of the first (or only) dimension of ``intensityDistribution`` are uniformly mapped to γ in [0–π]; the first intensity value to 0, the last value to π, thus at least two values need to be present. If the array has a second dimension then the intensities are not rotational symmetric around ``direction``, but are accordingly mapped to the C-halfplanes in [0–2π]; the first “row” of values to 0 and 2π, the other rows such that they have uniform distance to its neighbors. The orientation of the C0-plane is specified via ``c0``. A combination of using an ``intensityDistribution`` and ``OSP_INTENSITY_QUANTITY_POWER`` as ``intensityQuantity`` is not supported at the moment. - -.. figure:: https://ospray.github.io/images/spot_coords.png - :alt: C-γ coordinate system for the mapping of ``intensityDistribution`` to the spotlight. - - C-γ coordinate system for the mapping of ``intensityDistribution`` to the spotlight. - -Quad Light -~~~~~~~~~~ - -The quad [3]_ light is a planar, procedural area light source emitting uniformly on one side into the half-space. It is created by passing the type string “``quad``” to ``ospNewLight``. The quad light supports ``OSP_INTENSITY_QUANTITY_POWER``, ``OSP_INTENSITY_QUANTITY_INTENSITY`` and ``OSP_INTENSITY_QUANTITY_RADIANCE`` (default) as ``intensityQuantity`` parameter. In addition to the `general parameters <#lights>`__ understood by all lights the quad light supports the following special parameters: - -.. table:: Special parameters accepted by the quad light. - - +-------+----------+-------------------+------------------------------------------+ - | Type | Name | Default | Description | - +=======+==========+===================+==========================================+ - | vec3f | position | :math:`(0, 0, 0)` | position of one vertex of the quad light | - +-------+----------+-------------------+------------------------------------------+ - | vec3f | edge1 | :math:`(1, 0, 0)` | vector to one adjacent vertex | - +-------+----------+-------------------+------------------------------------------+ - | vec3f | edge2 | :math:`(0, 1, 0)` | vector to the other adjacent vertex | - +-------+----------+-------------------+------------------------------------------+ - -.. figure:: https://ospray.github.io/images/quad_light.png - :alt: Defining a quad light which emits toward the reader. - - Defining a quad light which emits toward the reader. - -The emission side is determined by the cross product of ``edge1``\ ×\ ``edge2``. Note that only renderers that use stochastic sampling (like the path tracer) will compute soft shadows from the quad light. Other renderers will just sample the center of the quad light, which results in hard shadows. - -Cylinder Light -~~~~~~~~~~~~~~ - -The cylinder light is a cylindrical, procedural area light source emitting uniformly outwardly into the space beyond the boundary. It is created by passing the type string “``cylinder``” to ``ospNewLight``. The cylinder light supports ``OSP_INTENSITY_QUANTITY_POWER``, ``OSP_INTENSITY_QUANTITY_INTENSITY`` and ``OSP_INTENSITY_QUANTITY_RADIANCE`` (default) as ``intensityQuantity`` parameter. In addition to the `general parameters <#lights>`__ understood by all lights the cylinder light supports the following special parameters: - -.. table:: Special parameters accepted by the cylinder light. - - ===== ========= ================= ===================================== - Type Name Default Description - ===== ========= ================= ===================================== - vec3f position0 :math:`(0, 0, 0)` position of the start of the cylinder - vec3f position1 :math:`(0, 0, 1)` position of the end of the cylinder - float radius 1 radius of the cylinder - ===== ========= ================= ===================================== - -Note that only renderers that use stochastic sampling (like the path tracer) will compute soft shadows from the cylinder light. Other renderers will just sample the closest point on the cylinder light, which results in hard shadows. - -HDRI Light -~~~~~~~~~~ - -The HDRI light is a textured light source surrounding the scene and illuminating it from infinity. It is created by passing the type string “``hdri``” to ``ospNewLight``. The values of the HDRI correspond to radiance and therefore the HDRI light only accepts ``OSP_INTENSITY_QUANTITY_SCALE`` as ``intensityQuantity`` parameter value. In addition to the `general parameters <#lights>`__ the HDRI light supports the following special parameters: - -.. table:: Special parameters accepted by the HDRI light. - - +------------+-----------+-------------------+---------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +============+===========+===================+=====================================================================================================================+ - | vec3f | up | :math:`(0, 1, 0)` | up direction of the light | - +------------+-----------+-------------------+---------------------------------------------------------------------------------------------------------------------+ - | vec3f | direction | :math:`(0, 0, 1)` | direction to which the center of the texture will be mapped to (analog to `panoramic camera <#panoramic-camera>`__) | - +------------+-----------+-------------------+---------------------------------------------------------------------------------------------------------------------+ - | OSPTexture | map | | environment map in latitude / longitude format | - +------------+-----------+-------------------+---------------------------------------------------------------------------------------------------------------------+ - -.. figure:: https://ospray.github.io/images/hdri_light.png - :alt: Orientation and Mapping of an HDRI Light. - - Orientation and Mapping of an HDRI Light. - -Note that the `SciVis renderer <#scivis-renderer>`__ only shows the HDRI light in the background (like an environment map) without computing illumination of the scene. - -Ambient Light -~~~~~~~~~~~~~ - -The ambient light surrounds the scene and illuminates it from infinity with constant radiance (determined by combining the `parameters ``color`` and ``intensity`` <#lights>`__). It is created by passing the type string “``ambient``” to ``ospNewLight``. The ambient light supports ``OSP_INTENSITY_QUANTITY_RADIANCE`` and ``OSP_INTENSITY_QUANTITY_IRRADIANCE`` (default) as ``intensityQuantity`` parameter value. - -Note that the `SciVis renderer <#scivis-renderer>`__ uses ambient lights to control the color and intensity of the computed ambient occlusion (AO). - -Sun-Sky Light -~~~~~~~~~~~~~ - -The sun-sky light is a combination of a ``distant`` light for the sun and a procedural ``hdri`` light for the sky. It is created by passing the type string “``sunSky``” to ``ospNewLight``. The sun-sky light surrounds the scene and illuminates it from infinity and can be used for rendering outdoor scenes. The radiance values are calculated using the Hošek-Wilkie sky model and solar radiance function. The underlying model of the sun-sky light returns radiance values and therefore the light only accepts ``OSP_INTENSITY_QUANTITY_SCALE`` as ``intensityQuantity`` parameter value. To rescale the returned radiance of the sky model the default value for the ``intensity`` parameter is set to ``0.025``. In addition to the `general parameters <#lights>`__ the following special parameters are supported: - -.. table:: Special parameters accepted by the ``sunSky`` light. - - +-------+------------------+--------------------+----------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=======+==================+====================+====================================================================================================+ - | vec3f | up | :math:`(0, 1, 0)` | zenith of sky | - +-------+------------------+--------------------+----------------------------------------------------------------------------------------------------+ - | vec3f | direction | :math:`(0, -1, 0)` | main emission direction of the sun | - +-------+------------------+--------------------+----------------------------------------------------------------------------------------------------+ - | float | turbidity | 3 | atmospheric turbidity due to particles, in [1–10] | - +-------+------------------+--------------------+----------------------------------------------------------------------------------------------------+ - | float | albedo | 0.3 | ground reflectance, in [0–1] | - +-------+------------------+--------------------+----------------------------------------------------------------------------------------------------+ - | float | horizonExtension | 0.01 | extend the sky dome by stretching the horizon, fraction of the lower hemisphere to cover, in [0–1] | - +-------+------------------+--------------------+----------------------------------------------------------------------------------------------------+ - -The lowest elevation for the sun is restricted to the horizon. - -Note that the `SciVis renderer <#scivis-renderer>`__ only computes illumination from the sun (yet the sky is still shown in the background, like an environment map). - -Emissive Objects -~~~~~~~~~~~~~~~~ - -The `path tracer <#path-tracer>`__ will consider illumination by `geometries <#geometries>`__ which have a light emitting material assigned (for example the `Luminous <#luminous>`__ material). - -Scene Hierarchy ---------------- - -Groups -~~~~~~ - -Groups in OSPRay represent collections of GeometricModels, VolumetricModels and Lights which share a common local-space coordinate system. To create a group call - -.. code:: cpp - - OSPGroup ospNewGroup(); - -Groups take arrays of geometric models, volumetric models, clipping geometric models and lights, but they are all optional. In other words, there is no need to create empty arrays if there are no geometries, volumes or lights in the group. - -By adding ``OSPGeometricModel``\ s to the ``clippingGeometry`` array a clipping geometry feature is enabled. Geometries assigned to this parameter will be used as clipping geometries. Any supported geometry can be used for clipping [4]_, the only requirement is that it has to distinctly partition space into clipping and non-clipping one. The use of clipping geometry that is not closed or infinite could result in rendering artifacts. User can decide which part of space is clipped by changing shading normals orientation with the ``invertNormals`` flag of the `GeometricModel <#geometricmodels>`__. All geometries and volumes assigned to ``geometry`` or ``volume`` will be clipped. All clipping geometries from all groups and `Instances <#instances>`__ will be combined together – a union of these areas will be applied to all other objects in the `world <#world>`__. - -.. table:: Parameters understood by groups. - - +----------------------+------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +======================+==================+=========+=============================================================================================================================================================+ - | OSPGeometricModel[] | geometry | NULL | `data <#data>`__ array of `GeometricModels <#geometricmodels>`__ | - +----------------------+------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPVolumetricModel[] | volume | NULL | `data <#data>`__ array of `VolumetricModels <#volumetricmodels>`__ | - +----------------------+------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPGeometricModel[] | clippingGeometry | NULL | `data <#data>`__ array of `GeometricModels <#geometricmodels>`__ used for clipping | - +----------------------+------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPLight[] | light | NULL | `data <#data>`__ array of `lights <#lights>`__ | - +----------------------+------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | dynamicScene | false | use RTC_SCENE_DYNAMIC flag (faster BVH build, slower ray traversal), otherwise uses RTC_SCENE_STATIC flag (faster ray traversal, slightly slower BVH build) | - +----------------------+------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | compactMode | false | tell Embree to use a more compact BVH in memory by trading ray traversal performance | - +----------------------+------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | robustMode | false | tell Embree to enable more robust ray intersection code paths (slightly slower) | - +----------------------+------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Instances -~~~~~~~~~ - -Instances in OSPRay represent a single group’s placement into the world via a transform. To create and instance call - -.. code:: cpp - - OSPInstance ospNewInstance(OSPGroup); - -.. table:: Parameters understood by instances. - - +------------+--------------------+----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +============+====================+==========+===========================================================================================================================================================+ - | affine3f | transform | identity | world-space transform for all attached geometries and volumes, overridden by ``motion.*`` arrays | - +------------+--------------------+----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | affine3f[] | motion.transform | | uniformly distributed world-space transforms | - +------------+--------------------+----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f[] | motion.scale | | uniformly distributed world-space scale, overridden by ``motion.transform`` | - +------------+--------------------+----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f[] | motion.pivot | | uniformly distributed world-space translation which is applied before ``motion.rotation`` (i.e., the rotation center), overridden by ``motion.transform`` | - +------------+--------------------+----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | quatf[] | motion.rotation | | uniformly distributed world-space quaternion rotation, overridden by ``motion.transform`` | - +------------+--------------------+----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f[] | motion.translation | | uniformly distributed world-space translation, overridden by ``motion.transform`` | - +------------+--------------------+----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | box1f | time | [0, 1] | time associated with first and last key in ``motion.*`` arrays (for motion blur) | - +------------+--------------------+----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------+ - -World -~~~~~ - -Worlds are a container of scene data represented by `instances <#instances>`__. To create an (empty) world call - -.. code:: cpp - - OSPWorld ospNewWorld(); - -Objects are placed in the world through an array of instances. Similar to `groups <#groups>`__, the array of instances is optional: there is no need to create empty arrays if there are no instances (though there will be nothing to render). - -Applications can query the world (axis-aligned) bounding box after the world has been committed. To get this information, call - -.. code:: cpp - - OSPBounds ospGetBounds(OSPObject); - -The result is returned in the provided ``OSPBounds``\ [5]_ struct: - -.. code:: cpp - - typedef struct { - float lower[3]; - float upper[3]; - } OSPBounds; - -This call can also take ``OSPGroup`` and ``OSPInstance`` as well: all other object types will return an empty bounding box. - -Finally, Worlds can be configured with parameters for making various feature/performance trade-offs (similar to groups). - -.. table:: Parameters understood by worlds. - - +---------------+--------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +===============+==============+=========+=============================================================================================================================================================+ - | OSPInstance[] | instance | NULL | `data <#data>`__ array with handles of the `instances <#instances>`__ | - +---------------+--------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPLight[] | light | NULL | `data <#data>`__ array with handles of the `lights <#lights>`__ | - +---------------+--------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | dynamicScene | false | use RTC_SCENE_DYNAMIC flag (faster BVH build, slower ray traversal), otherwise uses RTC_SCENE_STATIC flag (faster ray traversal, slightly slower BVH build) | - +---------------+--------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | compactMode | false | tell Embree to use a more compact BVH in memory by trading ray traversal performance | - +---------------+--------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | bool | robustMode | false | tell Embree to enable more robust ray intersection code paths (slightly slower) | - +---------------+--------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Renderers ---------- - -A renderer is the central object for rendering in OSPRay. Different renderers implement different features and support different materials. To create a new renderer of given type ``type`` use - -.. code:: cpp - - OSPRenderer ospNewRenderer(const char *type); - -General parameters of all renderers are - -.. table:: Parameters understood by all renderers. - - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=======================+===================+===========================+==========================================================================================================================================================+ - | int | pixelSamples | 1 | samples per pixel | - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | int | maxPathLength | 20 | maximum ray recursion depth | - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | minContribution | 0.001 | sample contributions below this value will be neglected to speedup rendering | - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | varianceThreshold | 0 | threshold for adaptive accumulation | - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float / vec3f / vec4f | backgroundColor | black, transparent | background color and alpha (linear A/RGB/RGBA), if no ``map_backplate`` is set | - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPTexture | map_backplate | | optional `texture <#texture>`__ image used as background (use texture type ``texture2d``) | - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPTexture | map_maxDepth | | optional screen-sized float `texture <#texture>`__ with maximum far distance per pixel (use texture type ``texture2d``) | - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSPMaterial[] | material | | optional `data <#data>`__ array of `materials <#materials>`__ which can be indexed by a `GeometricModel <#geometricmodels>`__\ ’s ``material`` parameter | - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uchar | pixelFilter | ``OSP_PIXELFILTER_GAUSS`` | ``OSPPixelFilterType`` to select the pixel filter used by the renderer for antialiasing. Possible pixel filters are listed below. | - +-----------------------+-------------------+---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------+ - -OSPRay’s renderers support a feature called adaptive accumulation, which accelerates progressive `rendering <#rendering>`__ by stopping the rendering and refinement of image regions that have an estimated variance below the ``varianceThreshold``. This feature requires a `framebuffer <#framebuffer>`__ with an ``OSP_FB_VARIANCE`` channel. - -Per default the background of the rendered image will be transparent black, i.e., the alpha channel holds the opacity of the rendered objects. This eases transparency-aware blending of the image with an arbitrary background image by the application. The parameter ``backgroundColor`` or ``map_backplate`` can be used to already blend with a constant background color or backplate texture, respectively, (and alpha) during rendering. - -OSPRay renderers support depth composition with images of other renderers, for example to incorporate help geometries of a 3D UI that were rendered with OpenGL. The screen-sized `texture <#texture>`__ ``map_maxDepth`` must have format ``OSP_TEXTURE_R32F`` and flag ``OSP_TEXTURE_FILTER_NEAREST``. The fetched values are used to limit the distance of primary rays, thus objects of other renderers can hide objects rendered by OSPRay. - -OSPRay supports antialiasing in image space by using pixel filters, which are centered around the center of a pixel. The size :math:`w×w` of the filter depends on the selected filter type. The types of supported pixel filters are defined by the ``OSPPixelFilterType`` enum and can be set using the ``pixelFilter`` parameter. - -.. table:: Pixel filter types supported by OSPRay for antialiasing in image space. - - +---------------------------------+---------------------------------------------------------------------------------------------------------------------------+ - | Name | Description | - +=================================+===========================================================================================================================+ - | OSP_PIXELFILTER_POINT | a point filter only samples the center of the pixel, therefore the filter width is :math:`w = 0` | - +---------------------------------+---------------------------------------------------------------------------------------------------------------------------+ - | OSP_PIXELFILTER_BOX | a uniform box filter with a width of :math:`w = 1` | - +---------------------------------+---------------------------------------------------------------------------------------------------------------------------+ - | OSP_PIXELFILTER_GAUSS | a truncated, smooth Gaussian filter with a standard deviation of :math:`\sigma = 0.5` and a filter width of :math:`w = 3` | - +---------------------------------+---------------------------------------------------------------------------------------------------------------------------+ - | OSP_PIXELFILTER_MITCHELL | the Mitchell-Netravali filter with a width of :math:`w = 4` | - +---------------------------------+---------------------------------------------------------------------------------------------------------------------------+ - | OSP_PIXELFILTER_BLACKMAN_HARRIS | the Blackman-Harris filter with a width of :math:`w = 3` | - +---------------------------------+---------------------------------------------------------------------------------------------------------------------------+ - -SciVis Renderer -~~~~~~~~~~~~~~~ - -The SciVis renderer is a fast ray tracer for scientific visualization which supports volume rendering and ambient occlusion (AO). It is created by passing the type string “``scivis``” to ``ospNewRenderer``. In addition to the `general parameters <#renderer>`__ understood by all renderers, the SciVis renderer supports the following parameters: - -.. table:: Special parameters understood by the SciVis renderer. - - +-------+--------------------+---------------+------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=======+====================+===============+==============================================================================================================================+ - | bool | shadows | false | whether to compute (hard) shadows | - +-------+--------------------+---------------+------------------------------------------------------------------------------------------------------------------------------+ - | int | aoSamples | 0 | number of rays per sample to compute ambient occlusion | - +-------+--------------------+---------------+------------------------------------------------------------------------------------------------------------------------------+ - | float | aoDistance | 10\ :sup:`20` | maximum distance to consider for ambient occlusion | - +-------+--------------------+---------------+------------------------------------------------------------------------------------------------------------------------------+ - | float | volumeSamplingRate | 1 | sampling rate for volumes | - +-------+--------------------+---------------+------------------------------------------------------------------------------------------------------------------------------+ - | bool | visibleLights | false | whether light sources are potentially visible (as in the `path tracer <#path-tracer>`__, regarding each light’s ``visible``) | - +-------+--------------------+---------------+------------------------------------------------------------------------------------------------------------------------------+ - -Note that the intensity (and color) of AO is deduced from an `ambient light <#ambient-light>`__ in the ``lights`` array. [6]_ If ``aoSamples`` is zero (the default) then ambient lights cause ambient illumination (without occlusion). - -Ambient Occlusion Renderer -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This renderer supports only a subset of the features of the `SciVis renderer <#scivis-renderer>`__ to gain performance. As the name suggest its main shading method is ambient occlusion (AO), `lights <#lights>`__ are *not* considered at all and , Volume rendering is supported. The Ambient Occlusion renderer is created by passing the type string “``ao``” to ``ospNewRenderer``. In addition to the `general parameters <#renderer>`__ understood by all renderers the following parameters are supported as well: - -.. table:: Special parameters understood by the Ambient Occlusion renderer. - - +-----------+--------------------+---------------+--------------------------------------------------------+ - | Type | Name | Default | Description | - +===========+====================+===============+========================================================+ - | int | aoSamples | 1 | number of rays per sample to compute ambient occlusion | - +-----------+--------------------+---------------+--------------------------------------------------------+ - | float | aoDistance | 10\ :sup:`20` | maximum distance to consider for ambient occlusion | - +-----------+--------------------+---------------+--------------------------------------------------------+ - | float | aoIntensity | 1 | ambient occlusion strength | - +-----------+--------------------+---------------+--------------------------------------------------------+ - | float | volumeSamplingRate | 1 | sampling rate for volumes | - +-----------+--------------------+---------------+--------------------------------------------------------+ - -Path Tracer -~~~~~~~~~~~ - -The path tracer supports soft shadows, indirect illumination and realistic materials. This renderer is created by passing the type string “``pathtracer``” to ``ospNewRenderer``. In addition to the `general parameters <#renderer>`__ understood by all renderers the path tracer supports the following special parameters: - -.. table:: Special parameters understood by the path tracer. - - +--------+----------------------+---------+-------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +========+======================+=========+===========================================================================================+ - | int | lightSamples | all | number of random light samples per path vertex, per default all light sources are sampled | - +--------+----------------------+---------+-------------------------------------------------------------------------------------------+ - | int | roulettePathLength | 5 | ray recursion depth at which to start Russian roulette termination | - +--------+----------------------+---------+-------------------------------------------------------------------------------------------+ - | float | maxContribution | ∞ | samples are clamped to this value before they are accumulated into the framebuffer | - +--------+----------------------+---------+-------------------------------------------------------------------------------------------+ - | bool | backgroundRefraction | false | allow for alpha blending even if background is seen through refractive objects like glass | - +--------+----------------------+---------+-------------------------------------------------------------------------------------------+ - -The path tracer requires that `materials <#materials>`__ are assigned to `geometries <#geometries>`__, otherwise surfaces are treated as completely black. - -The path tracer supports `volumes <#volumes>`__ with multiple scattering. The scattering albedo can be specified using the `transfer function <#transfer-function>`__. Extinction is assumed to be spectrally constant. - -Materials -~~~~~~~~~ - -Materials describe how light interacts with surfaces, they give objects their distinctive look. To let the given renderer create a new material of given type ``type`` call - -.. code:: cpp - - OSPMaterial ospNewMaterial(const char *, const char *material_type); - -Please note that the first argument is ignored. - -The returned handle can then be used to assign the material to a given geometry with - -.. code:: cpp - - void ospSetObject(OSPGeometricModel, "material", OSPMaterial); - -OBJ Material -^^^^^^^^^^^^ - -The OBJ material is the workhorse material supported by both the `SciVis renderer <#scivis-renderer>`__ and the `path tracer <#path-tracer>`__ (the `Ambient Occlusion renderer <#ambient-occlusion-renderer>`__ only uses the ``kd`` and ``d`` parameter). It offers widely used common properties like diffuse and specular reflection and is based on the `MTL material format `__ of Lightwave’s OBJ scene files. To create an OBJ material pass the type string “``obj``” to ``ospNewMaterial``. Its main parameters are - -.. table:: Main parameters of the OBJ material. - - +------------+----------+-----------+---------------------------------------------------------+ - | Type | Name | Default | Description | - +============+==========+===========+=========================================================+ - | vec3f | kd | white 0.8 | diffuse color (linear RGB) | - +------------+----------+-----------+---------------------------------------------------------+ - | vec3f | ks | black | specular color (linear RGB) | - +------------+----------+-----------+---------------------------------------------------------+ - | float | ns | 10 | shininess (Phong exponent), usually in [2–10\ :sup:`4`] | - +------------+----------+-----------+---------------------------------------------------------+ - | float | d | opaque | opacity | - +------------+----------+-----------+---------------------------------------------------------+ - | vec3f | tf | black | transparency filter color (linear RGB) | - +------------+----------+-----------+---------------------------------------------------------+ - | OSPTexture | map_bump | NULL | normal map | - +------------+----------+-----------+---------------------------------------------------------+ - -In particular when using the path tracer it is important to adhere to the principle of energy conservation, i.e., that the amount of light reflected by a surface is not larger than the light arriving. Therefore the path tracer issues a warning and renormalizes the color parameters if the sum of ``Kd``, ``Ks``, and ``Tf`` is larger than one in any color channel. Similarly important to mention is that almost all materials of the real world reflect at most only about 80% of the incoming light. So even for a white sheet of paper or white wall paint do better not set ``Kd`` larger than 0.8; otherwise rendering times are unnecessary long and the contrast in the final images is low (for example, the corners of a white room would hardly be discernible, as can be seen in the figure below). - -.. figure:: https://ospray.github.io/images/diffuse_rooms.png - :alt: Comparison of diffuse rooms with 100% reflecting white paint (left) and realistic 80% reflecting white paint (right), which leads to higher overall contrast. Note that exposure has been adjusted to achieve similar brightness levels. - :width: 80.0% - - Comparison of diffuse rooms with 100% reflecting white paint (left) and realistic 80% reflecting white paint (right), which leads to higher overall contrast. Note that exposure has been adjusted to achieve similar brightness levels. - -If present, the color component of `geometries <#geometries>`__ is also used for the diffuse color ``Kd`` and the alpha component is also used for the opacity ``d``. - -Normal mapping can simulate small geometric features via the texture ``map_Bump``. The normals :math:`n` in the normal map are with respect to the local tangential shading coordinate system and are encoded as :math:`½(n+1)`, thus a texel :math:`(0.5, 0.5, 1)`\ [7]_ represents the unperturbed shading normal :math:`(0, 0, 1)`. Because of this encoding an sRGB gamma `texture <#texture>`__ format is ignored and normals are always fetched as linear from a normal map. Note that the orientation of normal maps is important for a visually consistent look: by convention OSPRay uses a coordinate system with the origin in the lower left corner; thus a convexity will look green toward the top of the texture image (see also the example image of a normal map). If this is not the case flip the normal map vertically or invert its green channel. - -.. figure:: https://ospray.github.io/images/normalmap_frustum.png - :alt: Normal map representing an exalted square pyramidal frustum. - :width: 60.0% - - Normal map representing an exalted square pyramidal frustum. - -Note that ``Tf`` colored transparency is implemented in the SciVis and the path tracer but normal mapping with ``map_Bump`` is currently supported in the path tracer only. - -All parameters (except ``Tf``) can be textured by passing a `texture <#texture>`__ handle, prefixed with “``map_``”. The fetched texels are multiplied by the respective parameter value. If only the texture is given (but not the corresponding parameter), only the texture is used (the default value of the parameter is *not* multiplied). The color textures ``map_Kd`` and ``map_Ks`` are typically in one of the sRGB gamma encoded formats, whereas textures ``map_Ns`` and ``map_d`` are usually in a linear format (and only the first component is used). Additionally, all textures support `texture transformations <#texture-transformations>`__. - -.. figure:: https://ospray.github.io/images/material_OBJ.jpg - :alt: Rendering of a OBJ material with wood textures. - :width: 60.0% - - Rendering of a OBJ material with wood textures. - -Principled -^^^^^^^^^^ - -The Principled material is the most complex material offered by the `path tracer <#path-tracer>`__, which is capable of producing a wide variety of materials (e.g., plastic, metal, wood, glass) by combining multiple different layers and lobes. It uses the GGX microfacet distribution with approximate multiple scattering for dielectrics and metals, uses the Oren-Nayar model for diffuse reflection, and is energy conserving. To create a Principled material, pass the type string “``principled``” to ``ospNewMaterial``. Its parameters are listed in the table below. - -.. table:: Parameters of the Principled material. - - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=======+===================+===========+=======================================================================================================================+ - | vec3f | baseColor | white 0.8 | base reflectivity (diffuse and/or metallic, linear RGB) | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | vec3f | edgeColor | white | edge tint (metallic only, linear RGB) | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | metallic | 0 | mix between dielectric (diffuse and/or specular) and metallic (specular only with complex IOR) in [0–1] | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | diffuse | 1 | diffuse reflection weight in [0–1] | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | specular | 1 | specular reflection/transmission weight in [0–1] | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | ior | 1 | dielectric index of refraction | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | transmission | 0 | specular transmission weight in [0–1] | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | vec3f | transmissionColor | white | attenuated color due to transmission (Beer’s law, linear RGB) | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | transmissionDepth | 1 | distance at which color attenuation is equal to transmissionColor | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | roughness | 0 | diffuse and specular roughness in [0–1], 0 is perfectly smooth | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | anisotropy | 0 | amount of specular anisotropy in [0–1] | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | rotation | 0 | rotation of the direction of anisotropy in [0–1], 1 is going full circle | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | normal | 1 | default normal map/scale for all layers | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | baseNormal | 1 | base normal map/scale (overrides default normal) | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | bool | thin | false | flag specifying whether the material is thin or solid | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | thickness | 1 | thickness of the material (thin only), affects the amount of color attenuation due to specular transmission | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | backlight | 0 | amount of diffuse transmission (thin only) in [0–2], 1 is 50% reflection and 50% transmission, 2 is transmission only | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | coat | 0 | clear coat layer weight in [0–1] | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | coatIor | 1.5 | clear coat index of refraction | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | vec3f | coatColor | white | clear coat color tint (linear RGB) | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | coatThickness | 1 | clear coat thickness, affects the amount of color attenuation | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | coatRoughness | 0 | clear coat roughness in [0–1], 0 is perfectly smooth | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | coatNormal | 1 | clear coat normal map/scale (overrides default normal) | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | sheen | 0 | sheen layer weight in [0–1] | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | vec3f | sheenColor | white | sheen color tint (linear RGB) | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | sheenTint | 0 | how much sheen is tinted from sheenColor toward baseColor | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | sheenRoughness | 0.2 | sheen roughness in [0–1], 0 is perfectly smooth | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - | float | opacity | 1 | cut-out opacity/transparency, 1 is fully opaque | - +-------+-------------------+-----------+-----------------------------------------------------------------------------------------------------------------------+ - -All parameters can be textured by passing a `texture <#texture>`__ handle, prefixed with “``map_``” (e.g., “``map_baseColor``”). `texture transformations <#texture-transformations>`__ are supported as well. - -.. figure:: https://ospray.github.io/images/material_Principled.jpg - :alt: Rendering of a Principled coated brushed metal material with textured anisotropic rotation and a dust layer (sheen) on top. - :width: 60.0% - - Rendering of a Principled coated brushed metal material with textured anisotropic rotation and a dust layer (sheen) on top. - -CarPaint -^^^^^^^^ - -The CarPaint material is a specialized version of the Principled material for rendering different types of car paints. To create a CarPaint material, pass the type string “``carPaint``” to ``ospNewMaterial``. Its parameters are listed in the table below. - -.. table:: Parameters of the CarPaint material. - - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=======+=================+===========+========================================================================================================================+ - | vec3f | baseColor | white 0.8 | diffuse base reflectivity (linear RGB) | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | roughness | 0 | diffuse roughness in [0–1], 0 is perfectly smooth | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | normal | 1 | normal map/scale | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | vec3f | flakeColor | Aluminium | color of metallic flakes (linear RGB) | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | flakeDensity | 0 | density of metallic flakes in [0–1], 0 disables flakes, 1 fully covers the surface with flakes | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | flakeScale | 100 | scale of the flake structure, higher values increase the amount of flakes | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | flakeSpread | 0.3 | flake spread in [0–1] | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | flakeJitter | 0.75 | flake randomness in [0–1] | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | flakeRoughness | 0.3 | flake roughness in [0–1], 0 is perfectly smooth | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | coat | 1 | clear coat layer weight in [0–1] | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | coatIor | 1.5 | clear coat index of refraction | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | vec3f | coatColor | white | clear coat color tint (linear RGB) | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | coatThickness | 1 | clear coat thickness, affects the amount of color attenuation | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | coatRoughness | 0 | clear coat roughness in [0–1], 0 is perfectly smooth | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | coatNormal | 1 | clear coat normal map/scale | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | vec3f | flipflopColor | white | reflectivity of coated flakes at grazing angle, used together with coatColor produces a pearlescent paint (linear RGB) | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - | float | flipflopFalloff | 1 | flip flop color falloff, 1 disables the flip flop effect | - +-------+-----------------+-----------+------------------------------------------------------------------------------------------------------------------------+ - -All parameters can be textured by passing a `texture <#texture>`__ handle, prefixed with “``map_``” (e.g., “``map_baseColor``”). `texture transformations <#texture-transformations>`__ are supported as well. - -.. figure:: https://ospray.github.io/images/material_CarPaint.jpg - :alt: Rendering of a pearlescent CarPaint material. - :width: 60.0% - - Rendering of a pearlescent CarPaint material. - -Metal -^^^^^ - -The `path tracer <#path-tracer>`__ offers a physical metal, supporting changing roughness and realistic color shifts at edges. To create a Metal material pass the type string “``metal``” to ``ospNewMaterial``. Its parameters are - -.. table:: Parameters of the Metal material. - - +---------+-----------+-----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=========+===========+===========+=============================================================================================================================================================+ - | vec3f[] | ior | Aluminium | `data <#data>`__ array of spectral samples of complex refractive index, each entry in the form (wavelength, eta, k), ordered by wavelength (which is in nm) | - +---------+-----------+-----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | eta | | RGB complex refractive index, real part | - +---------+-----------+-----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | k | | RGB complex refractive index, imaginary part | - +---------+-----------+-----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | roughness | 0.1 | roughness in [0–1], 0 is perfect mirror | - +---------+-----------+-----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -The main appearance (mostly the color) of the Metal material is controlled by the physical parameters ``eta`` and ``k``, the wavelength-dependent, complex index of refraction. These coefficients are quite counter-intuitive but can be found in `published measurements `__. For accuracy the index of refraction can be given as an array of spectral samples in ``ior``, each sample a triplet of wavelength (in nm), eta, and k, ordered monotonically increasing by wavelength; OSPRay will then calculate the Fresnel in the spectral domain. Alternatively, ``eta`` and ``k`` can also be specified as approximated RGB coefficients; some examples are given in below table. - -.. table:: Index of refraction of selected metals as approximated RGB coefficients, based on data from https://refractiveindex.info/. - - ============= ===================== =============== - Metal eta k - ============= ===================== =============== - Ag, Silver (0.051, 0.043, 0.041) (5.3, 3.6, 2.3) - Al, Aluminium (1.5, 0.98, 0.6) (7.6, 6.6, 5.4) - Au, Gold (0.07, 0.37, 1.5) (3.7, 2.3, 1.7) - Cr, Chromium (3.2, 3.1, 2.3) (3.3, 3.3, 3.1) - Cu, Copper (0.1, 0.8, 1.1) (3.5, 2.5, 2.4) - ============= ===================== =============== - -The ``roughness`` parameter controls the variation of microfacets and thus how polished the metal will look. The roughness can be modified by a `texture <#texture>`__ ``map_roughness`` (`texture transformations <#texture-transformations>`__ are supported as well) to create notable edging effects. - -.. figure:: https://ospray.github.io/images/material_Metal.jpg - :alt: Rendering of golden Metal material with textured roughness. - :width: 60.0% - - Rendering of golden Metal material with textured roughness. - -Alloy -^^^^^ - -The `path tracer <#path-tracer>`__ offers an alloy material, which behaves similar to `Metal <#metal>`__, but allows for more intuitive and flexible control of the color. To create an Alloy material pass the type string “``alloy``” to ``ospNewMaterial``. Its parameters are - -.. table:: Parameters of the Alloy material. - - +-------+-----------+-----------+---------------------------------------------------------+ - | Type | Name | Default | Description | - +=======+===========+===========+=========================================================+ - | vec3f | color | white 0.9 | reflectivity at normal incidence (0 degree, linear RGB) | - +-------+-----------+-----------+---------------------------------------------------------+ - | vec3f | edgeColor | white | reflectivity at grazing angle (90 degree, linear RGB) | - +-------+-----------+-----------+---------------------------------------------------------+ - | float | roughness | 0.1 | roughness, in [0–1], 0 is perfect mirror | - +-------+-----------+-----------+---------------------------------------------------------+ - -The main appearance of the Alloy material is controlled by the parameter ``color``, while ``edgeColor`` influences the tint of reflections when seen at grazing angles (for real metals this is always 100% white). If present, the color component of `geometries <#geometries>`__ is also used for reflectivity at normal incidence ``color``. As in `Metal <#metal>`__ the ``roughness`` parameter controls the variation of microfacets and thus how polished the alloy will look. All parameters can be textured by passing a `texture <#texture>`__ handle, prefixed with “``map_``”; `texture transformations <#texture-transformations>`__ are supported as well. - -.. figure:: https://ospray.github.io/images/material_Alloy.jpg - :alt: Rendering of a fictional Alloy material with textured color. - :width: 60.0% - - Rendering of a fictional Alloy material with textured color. - -Glass -^^^^^ - -The `path tracer <#path-tracer>`__ offers a realistic a glass material, supporting refraction and volumetric attenuation (i.e., the transparency color varies with the geometric thickness). To create a Glass material pass the type string “``glass``” to ``ospNewMaterial``. Its parameters are - -.. table:: Parameters of the Glass material. - - +-------+---------------------+---------+-------------------------------------------------+ - | Type | Name | Default | Description | - +=======+=====================+=========+=================================================+ - | float | eta | 1.5 | index of refraction | - +-------+---------------------+---------+-------------------------------------------------+ - | vec3f | attenuationColor | white | resulting color due to attenuation (linear RGB) | - +-------+---------------------+---------+-------------------------------------------------+ - | float | attenuationDistance | 1 | distance affecting attenuation | - +-------+---------------------+---------+-------------------------------------------------+ - -For convenience, the rather counter-intuitive physical attenuation coefficients will be calculated from the user inputs in such a way, that the ``attenuationColor`` will be the result when white light traveled trough a glass of thickness ``attenuationDistance``. - -.. figure:: https://ospray.github.io/images/material_Glass.jpg - :alt: Rendering of a Glass material with orange attenuation. - :width: 60.0% - - Rendering of a Glass material with orange attenuation. - -ThinGlass -^^^^^^^^^ - -The `path tracer <#path-tracer>`__ offers a thin glass material useful for objects with just a single surface, most prominently windows. It models a thin, transparent slab, i.e., it behaves as if a second, virtual surface is parallel to the real geometric surface. The implementation accounts for multiple internal reflections between the interfaces (including attenuation), but neglects parallax effects due to its (virtual) thickness. To create a such a thin glass material pass the type string “``thinGlass``” to ``ospNewMaterial``. Its parameters are - -.. table:: Parameters of the ThinGlass material. - - +-------+---------------------+---------+-------------------------------------------------+ - | Type | Name | Default | Description | - +=======+=====================+=========+=================================================+ - | float | eta | 1.5 | index of refraction | - +-------+---------------------+---------+-------------------------------------------------+ - | vec3f | attenuationColor | white | resulting color due to attenuation (linear RGB) | - +-------+---------------------+---------+-------------------------------------------------+ - | float | attenuationDistance | 1 | distance affecting attenuation | - +-------+---------------------+---------+-------------------------------------------------+ - | float | thickness | 1 | virtual thickness | - +-------+---------------------+---------+-------------------------------------------------+ - -For convenience the attenuation is controlled the same way as with the `Glass <#glass>`__ material. Additionally, the color due to attenuation can be modulated with a `texture <#texture>`__ ``map_attenuationColor`` (`texture transformations <#texture-transformations>`__ are supported as well). If present, the color component of `geometries <#geometries>`__ is also used for the attenuation color. The ``thickness`` parameter sets the (virtual) thickness and allows for easy exchange of parameters with the (real) `Glass <#glass>`__ material; internally just the ratio between ``attenuationDistance`` and ``thickness`` is used to calculate the resulting attenuation and thus the material appearance. - -.. figure:: https://ospray.github.io/images/material_ThinGlass.jpg - :alt: Rendering of a ThinGlass material with red attenuation. - :width: 60.0% - - Rendering of a ThinGlass material with red attenuation. - -.. figure:: https://ospray.github.io/images/ColoredWindow.jpg - :alt: Example image of a colored window made with textured attenuation of the ThinGlass material. - :width: 60.0% - - Example image of a colored window made with textured attenuation of the ThinGlass material. - -MetallicPaint -^^^^^^^^^^^^^ - -The `path tracer <#path-tracer>`__ offers a metallic paint material, consisting of a base coat with optional flakes and a clear coat. To create a MetallicPaint material pass the type string “``metallicPaint``” to ``ospNewMaterial``. Its parameters are listed in the table below. - -.. table:: Parameters of the MetallicPaint material. - - ===== =========== ========= ===================================== - Type Name Default Description - ===== =========== ========= ===================================== - vec3f baseColor white 0.8 color of base coat (linear RGB) - float flakeAmount 0.3 amount of flakes, in [0–1] - vec3f flakeColor Aluminium color of metallic flakes (linear RGB) - float flakeSpread 0.5 spread of flakes, in [0–1] - float eta 1.5 index of refraction of clear coat - ===== =========== ========= ===================================== - -The color of the base coat ``baseColor`` can be textured by a `texture <#texture>`__ ``map_baseColor``, which also supports `texture transformations <#texture-transformations>`__. If present, the color component of `geometries <#geometries>`__ is also used for the color of the base coat. Parameter ``flakeAmount`` controls the proportion of flakes in the base coat, so when setting it to 1 the ``baseColor`` will not be visible. The shininess of the metallic component is governed by ``flakeSpread``, which controls the variation of the orientation of the flakes, similar to the ``roughness`` parameter of `Metal <#metal>`__. Note that the effect of the metallic flakes is currently only computed on average, thus individual flakes are not visible. - -.. figure:: https://ospray.github.io/images/material_MetallicPaint.jpg - :alt: Rendering of a MetallicPaint material. - :width: 60.0% - - Rendering of a MetallicPaint material. - -Luminous -^^^^^^^^ - -The `path tracer <#path-tracer>`__ supports the Luminous material which emits light uniformly in all directions and which can thus be used to turn any geometric object into a light source. It is created by passing the type string “``luminous``” to ``ospNewMaterial``. The amount of constant radiance that is emitted is determined by combining the general parameters of lights: ```color`` and ``intensity`` <#lights>`__ (which essentially means that parameter ``intensityQuantity`` is not needed because it is always ``OSP_INTENSITY_QUANTITY_RADIANCE``). - -.. table:: Parameters accepted by the Luminous material. - - ===== ============ ======= ======================================= - Type Name Default Description - ===== ============ ======= ======================================= - vec3f color white color of the emitted light (linear RGB) - float intensity 1 intensity of the light (a factor) - float transparency 1 material transparency - ===== ============ ======= ======================================= - -.. figure:: https://ospray.github.io/images/material_Luminous.jpg - :alt: Rendering of a yellow Luminous material. - :width: 60.0% - - Rendering of a yellow Luminous material. - -Texture -~~~~~~~ - -OSPRay currently implements two texture types (``texture2d`` and ``volume``) and is open for extension to other types by applications. More types may be added in future releases. - -To create a new texture use - -.. code:: cpp - - OSPTexture ospNewTexture(const char *type); - -Texture2D -^^^^^^^^^ - -The ``texture2d`` texture type implements an image-based texture, where its parameters are as follows - -.. table:: Parameters of ``texture2d`` texture type. - - +---------+--------+---------------------------------------------------------------------------------------+ - | Type | Name | Description | - +=========+========+=======================================================================================+ - | int | format | ``OSPTextureFormat`` for the texture | - +---------+--------+---------------------------------------------------------------------------------------+ - | int | filter | default ``OSP_TEXTURE_FILTER_BILINEAR``, alternatively ``OSP_TEXTURE_FILTER_NEAREST`` | - +---------+--------+---------------------------------------------------------------------------------------+ - | OSPData | data | the actual texel 2D `data <#data>`__ | - +---------+--------+---------------------------------------------------------------------------------------+ - -The supported texture formats for ``texture2d`` are: - -.. table:: Supported texture formats by ``texture2d``, i.e., valid constants of type ``OSPTextureFormat``. - - +---------------------+--------------------------------------------------------------------------+ - | Name | Description | - +=====================+==========================================================================+ - | OSP_TEXTURE_RGBA8 | 8 bit [0–255] linear components red, green, blue, alpha | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_SRGBA | 8 bit sRGB gamma encoded color components, and linear alpha | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_RGBA32F | 32 bit float components red, green, blue, alpha | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_RGB8 | 8 bit [0–255] linear components red, green, blue | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_SRGB | 8 bit sRGB gamma encoded components red, green, blue | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_RGB32F | 32 bit float components red, green, blue | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_R8 | 8 bit [0–255] linear single component red | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_RA8 | 8 bit [0–255] linear two components red, alpha | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_L8 | 8 bit [0–255] gamma encoded luminance (replicated into red, green, blue) | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_LA8 | 8 bit [0–255] gamma encoded luminance, and linear alpha | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_R32F | 32 bit float single component red | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_RGBA16 | 16 bit [0–65535] linear components red, green, blue, alpha | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_RGB16 | 16 bit [0–65535] linear components red, green, blue | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_RA16 | 16 bit [0–65535] linear two components red, alpha | - +---------------------+--------------------------------------------------------------------------+ - | OSP_TEXTURE_R16 | 16 bit [0–65535] linear single component red | - +---------------------+--------------------------------------------------------------------------+ - -The size of the texture is inferred from the size of the 2D array ``data``, which also needs have a compatible type to ``format``. The texel data in ``data`` starts with the texels in the lower left corner of the texture image, like in OpenGL. Per default a texture fetch is filtered by performing bi-linear interpolation of the nearest 2×2 texels; if instead fetching only the nearest texel is desired (i.e., no filtering) then pass the ``OSP_TEXTURE_FILTER_NEAREST`` flag. - -Texturing with ``texture2d`` image textures requires `geometries <#geometries>`__ with texture coordinates, e.g., a `mesh <#mesh>`__ with ``vertex.texcoord`` provided. - -Volume Texture -^^^^^^^^^^^^^^ - -The ``volume`` texture type implements texture lookups based on 3D object coordinates of the surface hit point on the associated geometry. If the given hit point is within the attached volume, the volume is sampled and classified with the transfer function attached to the volume. This implements the ability to visualize volume values (as colored by a transfer function) on arbitrary surfaces inside the volume (as opposed to an isosurface showing a particular value in the volume). Its parameters are as follows - -.. table:: Parameters of ``volume`` texture type. - - +---------------------+------------------+------------------------------------------------------------------+ - | Type | Name | Description | - +=====================+==================+==================================================================+ - | OSPVolume | volume | `Volume <#volumes>`__ used to generate color lookups | - +---------------------+------------------+------------------------------------------------------------------+ - | OSPTransferFunction | transferFunction | `transfer function <#transfer-function>`__ applied to ``volume`` | - +---------------------+------------------+------------------------------------------------------------------+ - -TextureVolume can be used for implementing slicing of volumes with any geometry type. It enables coloring of the slicing geometry with a different transfer function than that of the sliced volume. - -Texture Transformations -^^^^^^^^^^^^^^^^^^^^^^^ - -All materials with textures also offer to manipulate the placement of these textures with the help of texture transformations. If so, this convention shall be used: the following parameters are prefixed with “``texture_name.*``”). - -.. table:: Parameters to define 2D texture coordinate transformations. - - +----------+-------------+--------------------------------------------------------+ - | Type | Name | Description | - +==========+=============+========================================================+ - | linear2f | transform | linear transformation (rotation, scale) | - +----------+-------------+--------------------------------------------------------+ - | float | rotation | angle in degree, counterclockwise, around center | - +----------+-------------+--------------------------------------------------------+ - | vec2f | scale | enlarge texture, relative to center :math:`(0.5, 0.5)` | - +----------+-------------+--------------------------------------------------------+ - | vec2f | translation | move texture in positive direction (right/up) | - +----------+-------------+--------------------------------------------------------+ - -Above parameters are combined into a single ``affine2d`` transformation matrix and the transformations are applied in the given order. Rotation, scale and translation are interpreted “texture centric”, i.e., their effect seen by an user are relative to the texture (although the transformations are applied to the texture coordinates). - -.. table:: Parameter to define 3D volume texture transformations. - - +----------+-----------+----------------------------------------------------------+ - | Type | Name | Description | - +==========+===========+==========================================================+ - | affine3f | transform | linear transformation (rotation, scale) plus translation | - +----------+-----------+----------------------------------------------------------+ - -Similarly, volume texture placement can also be modified by an ``affine3f`` transformation matrix. - -Cameras -~~~~~~~ - -To create a new camera of given type ``type`` use - -.. code:: cpp - - OSPCamera ospNewCamera(const char *type); - -All cameras accept these parameters: - -.. table:: Parameters accepted by all cameras. - - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +============+========================+========================+======================================================================================================================================================================+ - | vec3f | position | :math:`(0, 0, 0)` | position of the camera | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | direction | :math:`(0, 0, 1)` | main viewing direction of the camera | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f | up | :math:`(0, 1, 0)` | up direction of the camera | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | affine3f | transform | identity | additional world-space transform, overridden by ``motion.*`` arrays | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | nearClip | 10\ :sup:`-6` | near clipping distance | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec2f | imageStart | :math:`(0, 0)` | start of image region (lower left corner) | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec2f | imageEnd | :math:`(1, 1)` | end of image region (upper right corner) | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | affine3f[] | motion.transform | | additional uniformly distributed world-space transforms | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f[] | motion.scale | | additional uniformly distributed world-space scale, overridden by ``motion.transform`` | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f[] | motion.pivot | | additional uniformly distributed world-space translation which is applied before ``motion.rotation`` (i.e., the rotation center), overridden by ``motion.transform`` | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | quatf[] | motion.rotation | | additional uniformly distributed world-space quaternion rotation, overridden by ``motion.transform`` | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | vec3f[] | motion.translation | | additional uniformly distributed world-space translation, overridden by ``motion.transform`` | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | box1f | time | [0, 1] | time associated with first and last key in ``motion.*`` arrays | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | box1f | shutter | [0.5, 0.5] | start and end of shutter time (for motion blur), in [0, 1] | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uchar | shutterType | ``OSP_SHUTTER_GLOBAL`` | ``OSPShutterType`` for motion blur, also allowed are: | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_SHUTTER_ROLLING_RIGHT`` | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_SHUTTER_ROLLING_LEFT`` | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_SHUTTER_ROLLING_DOWN`` | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | | | | ``OSP_SHUTTER_ROLLING_UP`` | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | rollingShutterDuration | 0 | for a rolling shutter (see ``shutterType``) the “open” time per line, in [0, ``shutter``.upper-``shutter``.lower] | - +------------+------------------------+------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -The camera is placed and oriented in the world with ``position``, ``direction`` and ``up``. Additionally, an extra transformation ``transform`` can be specified, which will only be applied to 3D vectors (i.e. ``position``, ``direction`` and ``up``), but does *not* affect any sizes (e.g., ``nearClip``, ``apertureRadius``, or ``height``). The same holds for the array of transformations ``motion.transform`` to achieve camera motion blur (in combination with ``time`` and ``shutter``). - -OSPRay uses a right-handed coordinate system. The region of the camera sensor that is rendered to the image can be specified in normalized screen-space coordinates with ``imageStart`` (lower left corner) and ``imageEnd`` (upper right corner). This can be used, for example, to crop the image, to achieve asymmetrical view frusta, or to horizontally flip the image to view scenes which are specified in a left-handed coordinate system. Note that values outside the default range of [0–1] are valid, which is useful to easily realize overscan or film gate, or to emulate a shifted sensor. - -Perspective Camera -^^^^^^^^^^^^^^^^^^ - -The perspective camera implements a simple thin lens camera for perspective rendering, supporting optionally depth of field and stereo rendering (with the `path tracer <#path-tracer>`__). It is created by passing the type string “``perspective``” to ``ospNewCamera``. In addition to the `general parameters <#cameras>`__ understood by all cameras the perspective camera supports the special parameters listed in the table below. - -.. table:: Additional parameters accepted by the perspective camera. - - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=======+========================+=====================+========================================================================+ - | float | fovy | 60 | the field of view (angle in degree) of the frame’s height | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | float | aspect | 1 | ratio of width by height of the frame (and image region) | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | float | apertureRadius | 0 | size of the aperture, controls the depth of field | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | float | focusDistance | 1 | distance at where the image is sharpest when depth of field is enabled | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | bool | architectural | false | vertical edges are projected to be parallel | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | uchar | stereoMode | ``OSP_STEREO_NONE`` | ``OSPStereoMode`` for stereo rendering, also allowed are: | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | | | | ``OSP_STEREO_LEFT`` | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | | | | ``OSP_STEREO_RIGHT`` | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | | | | ``OSP_STEREO_SIDE_BY_SIDE`` | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | | | | ``OSP_STEREO_TOP_BOTTOM`` (left eye at top half) | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - | float | interpupillaryDistance | 0.0635 | distance between left and right eye when stereo is enabled | - +-------+------------------------+---------------------+------------------------------------------------------------------------+ - -Note that when computing the ``aspect`` ratio a potentially set image region (using ``imageStart`` & ``imageEnd``) needs to be regarded as well. - -In architectural photography it is often desired for aesthetic reasons to display the vertical edges of buildings or walls vertically in the image as well, regardless of how the camera is tilted. Enabling the ``architectural`` mode achieves this by internally leveling the camera parallel to the ground (based on the ``up`` direction) and then shifting the lens such that the objects in direction ``dir`` are centered in the image. If finer control of the lens shift is needed use ``imageStart`` & ``imageEnd``. Because the camera is now effectively leveled its image plane and thus the plane of focus is oriented parallel to the front of buildings, the whole façade appears sharp, as can be seen in the example images below. The resolution of the `framebuffer <#framebuffer>`__ is not altered by ``imageStart``/``imageEnd``. - -.. figure:: https://ospray.github.io/images/camera_perspective.jpg - :alt: Example image created with the perspective camera, featuring depth of field. - :width: 60.0% - - Example image created with the perspective camera, featuring depth of field. - -.. figure:: https://ospray.github.io/images/camera_architectural.jpg - :alt: Enabling the ``architectural`` flag corrects the perspective projection distortion, resulting in parallel vertical edges. - :width: 60.0% - - Enabling the ``architectural`` flag corrects the perspective projection distortion, resulting in parallel vertical edges. - -.. figure:: https://ospray.github.io/images/camera_stereo.jpg - :alt: Example 3D stereo image using ``stereoMode = OSP_STEREO_SIDE_BY_SIDE``. - :width: 90.0% - - Example 3D stereo image using ``stereoMode = OSP_STEREO_SIDE_BY_SIDE``. - -Orthographic Camera -^^^^^^^^^^^^^^^^^^^ - -The orthographic camera implements a simple camera with orthographic projection, without support for depth. It is created by passing the type string “``orthographic``” to ``ospNewCamera``. In addition to the `general parameters <#cameras>`__ understood by all cameras the orthographic camera supports the following special parameters: - -.. table:: Additional parameters accepted by the orthographic camera. - - ===== ====== =========================================================== - Type Name Description - ===== ====== =========================================================== - float height size of the camera’s image plane in y, in world coordinates - float aspect ratio of width by height of the frame - ===== ====== =========================================================== - -For convenience the size of the camera sensor, and thus the extent of the scene that is captured in the image, can be controlled with the ``height`` parameter. The same effect can be achieved with ``imageStart`` and ``imageEnd``, and both methods can be combined. In any case, the ``aspect`` ratio needs to be set accordingly to get an undistorted image. - -.. figure:: https://ospray.github.io/images/camera_orthographic.jpg - :alt: Example image created with the orthographic camera. - :width: 60.0% - - Example image created with the orthographic camera. - -Panoramic Camera -^^^^^^^^^^^^^^^^ - -The panoramic camera implements a simple camera with support for stereo rendering. It captures the complete surrounding with a latitude / longitude mapping and thus the rendered images should best have a ratio of 2:1. A panoramic camera is created by passing the type string “``panoramic``” to ``ospNewCamera``. It is placed and oriented in the scene by using the `general parameters <#cameras>`__ understood by all cameras. - -.. table:: Additional parameters accepted by the panoramic camera. - - +-------+------------------------+----------------------------------------------------------------------------+ - | Type | Name | Description | - +=======+========================+============================================================================+ - | uchar | stereoMode | ``OSPStereoMode`` for stereo rendering, possible values are: | - +-------+------------------------+----------------------------------------------------------------------------+ - | | | ``OSP_STEREO_NONE`` (default) | - +-------+------------------------+----------------------------------------------------------------------------+ - | | | ``OSP_STEREO_LEFT`` | - +-------+------------------------+----------------------------------------------------------------------------+ - | | | ``OSP_STEREO_RIGHT`` | - +-------+------------------------+----------------------------------------------------------------------------+ - | | | ``OSP_STEREO_SIDE_BY_SIDE`` | - +-------+------------------------+----------------------------------------------------------------------------+ - | | | ``OSP_STEREO_TOP_BOTTOM`` (left eye at top half) | - +-------+------------------------+----------------------------------------------------------------------------+ - | float | interpupillaryDistance | distance between left and right eye when stereo is enabled, default 0.0635 | - +-------+------------------------+----------------------------------------------------------------------------+ - -.. figure:: https://ospray.github.io/images/camera_panoramic.jpg - :alt: Latitude / longitude map created with the panoramic camera. - :width: 90.0% - - Latitude / longitude map created with the panoramic camera. - -Picking -~~~~~~~ - -To get the world-space position of the geometry (if any) seen at [0–1] normalized screen-space pixel coordinates ``screenPos_x`` and ``screenPos_y`` use - -.. code:: cpp - - void ospPick(OSPPickResult *, - OSPFrameBuffer, - OSPRenderer, - OSPCamera, - OSPWorld, - float screenPos_x, - float screenPos_y); - -The result is returned in the provided ``OSPPickResult`` struct: - -.. code:: cpp - - typedef struct { - int hasHit; - float worldPosition[3]; - OSPInstance instance; - OSPGeometricModel model; - uint32_t primID; - } OSPPickResult; - -Note that ``ospPick`` considers exactly the same camera of the given renderer that is used to render an image, thus matching results can be expected. If the camera supports depth of field then the center of the lens and thus the center of the circle of confusion is used for picking. Note that the caller needs to ``ospRelease`` the ``instance`` and ``model`` handles of ``OSPPickResult`` once the information is not needed anymore. - -Framebuffer ------------ - -The framebuffer holds the rendered 2D image (and optionally auxiliary information associated with pixels). To create a new framebuffer object of given size ``size`` (in pixels), color format, and channels use - -.. code:: cpp - - OSPFrameBuffer ospNewFrameBuffer(int size_x, int size_y, - OSPFrameBufferFormat format = OSP_FB_SRGBA, - uint32_t frameBufferChannels = OSP_FB_COLOR); - -The parameter ``format`` describes the format the color buffer has *on the host*, and the format that ``ospMapFrameBuffer`` will eventually return. Valid values are: - -.. table:: Supported color formats of the framebuffer that can be passed to ``ospNewFrameBuffer``, i.e., valid constants of type ``OSPFrameBufferFormat``. - - +----------------+-------------------------------------------------------------+ - | Name | Description | - +================+=============================================================+ - | OSP_FB_NONE | framebuffer will not be mapped by the application | - +----------------+-------------------------------------------------------------+ - | OSP_FB_RGBA8 | 8 bit [0–255] linear component red, green, blue, alpha | - +----------------+-------------------------------------------------------------+ - | OSP_FB_SRGBA | 8 bit sRGB gamma encoded color components, and linear alpha | - +----------------+-------------------------------------------------------------+ - | OSP_FB_RGBA32F | 32 bit float components red, green, blue, alpha | - +----------------+-------------------------------------------------------------+ - -The parameter ``frameBufferChannels`` specifies which channels the framebuffer holds, and can be combined together by bitwise OR from the values of ``OSPFrameBufferChannel`` listed in the table below. - -.. table:: Framebuffer channels constants (of type ``OSPFrameBufferChannel``), naming optional information the framebuffer can store. These values can be combined by bitwise OR when passed to ``ospNewFrameBuffer``. - - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------+ - | Name | Description | - +=================+============================================================================================================================================+ - | OSP_FB_COLOR | RGB color including alpha | - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------+ - | OSP_FB_DEPTH | euclidean distance to the camera (*not* to the image plane), as linear 32 bit float; for multiple samples per pixel their minimum is taken | - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------+ - | OSP_FB_ACCUM | accumulation buffer for progressive refinement | - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------+ - | OSP_FB_VARIANCE | for estimation of the current noise level if OSP_FB_ACCUM is also present, see `rendering <#rendering>`__ | - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------+ - | OSP_FB_NORMAL | accumulated world-space normal of the first non-specular hit, as vec3f | - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------+ - | OSP_FB_ALBEDO | accumulated material albedo (color without illumination) at the first hit, as vec3f | - +-----------------+--------------------------------------------------------------------------------------------------------------------------------------------+ - -If a certain channel value is *not* specified, the given buffer channel will not be present. Note that OSPRay makes a clear distinction between the *external* format of the framebuffer and the internal one: The external format is the format the user specifies in the ``format`` parameter; it specifies what color format OSPRay will eventually *return* the framebuffer to the application (when calling ``ospMapFrameBuffer``): no matter what OSPRay uses internally, it will simply return a 2D array of pixels of that format, with possibly all kinds of reformatting, compression/decompression, etc., going on in-between the generation of the *internal* framebuffer and the mapping of the externally visible one. - -In particular, ``OSP_FB_NONE`` is a perfectly valid pixel format for a framebuffer that an application will never map. For example, an application driving a display wall may well generate an intermediate framebuffer and eventually transfer its pixel to the individual displays using an ``OSPImageOperation`` `image operation <#image-operation>`__. - -The application can map the given channel of a framebuffer – and thus access the stored pixel information – via - -.. code:: cpp - - const void *ospMapFrameBuffer(OSPFrameBuffer, OSPFrameBufferChannel = OSP_FB_COLOR); - -Note that ``OSP_FB_ACCUM`` or ``OSP_FB_VARIANCE`` cannot be mapped. The origin of the screen coordinate system in OSPRay is the lower left corner (as in OpenGL), thus the first pixel addressed by the returned pointer is the lower left pixel of the image. - -A previously mapped channel of a framebuffer can be unmapped by passing the received pointer ``mapped`` to - -.. code:: cpp - - void ospUnmapFrameBuffer(const void *mapped, OSPFrameBuffer); - -The individual channels of a framebuffer can be cleared with - -.. code:: cpp - - void ospResetAccumulation(OSPFrameBuffer); - -This function will clear *all* accumulating buffers (``OSP_FB_VARIANCE``, ``OSP_FB_NORMAL``, and ``OSP_FB_ALBEDO``, if present) and resets the accumulation counter ``accumID``. It is unspecified if the existing color and depth buffers are physically cleared when ``ospResetAccumulation`` is called. - -If ``OSP_FB_VARIANCE`` is specified, an estimate of the variance of the last accumulated frame can be queried with - -.. code:: cpp - - float ospGetVariance(OSPFrameBuffer); - -Note this value is only updated after synchronizing with ``OSP_FRAME_FINISHED``, as further described in `asynchronous rendering <#asynchronous-rendering>`__. The estimated variance can be used by the application as a quality indicator and thus to decide whether to stop or to continue progressive rendering. - -The framebuffer takes a list of pixel operations to be applied to the image in sequence as an ``OSPData``. The pixel operations will be run in the order they are in the array. - -.. table:: Parameters accepted by the framebuffer. - - =================== ============== ==================================== - Type Name Description - =================== ============== ==================================== - OSPImageOperation[] imageOperation ordered sequence of image operations - =================== ============== ==================================== - -Image Operation -~~~~~~~~~~~~~~~ - -Image operations are functions that are applied to every pixel of a frame. Examples include post-processing, filtering, blending, tone mapping, or sending tiles to a display wall. To create a new pixel operation of given type ``type`` use - -.. code:: cpp - - OSPImageOperation ospNewImageOperation(const char *type); - -Tone Mapper -^^^^^^^^^^^ - -The tone mapper is a pixel operation which implements a generic filmic tone mapping operator. Using the default parameters it approximates the Academy Color Encoding System (ACES). The tone mapper is created by passing the type string “``tonemapper``” to ``ospNewImageOperation``. The tone mapping curve can be customized using the parameters listed in the table below. - -.. table:: Parameters accepted by the tone mapper. - - +-------+-----------+---------+------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=======+===========+=========+========================================================================+ - | float | exposure | 1.0 | amount of light per unit area | - +-------+-----------+---------+------------------------------------------------------------------------+ - | float | contrast | 1.6773 | contrast (toe of the curve); typically is in [1–2] | - +-------+-----------+---------+------------------------------------------------------------------------+ - | float | shoulder | 0.9714 | highlight compression (shoulder of the curve); typically is in [0.9–1] | - +-------+-----------+---------+------------------------------------------------------------------------+ - | float | midIn | 0.18 | mid-level anchor input; default is 18% gray | - +-------+-----------+---------+------------------------------------------------------------------------+ - | float | midOut | 0.18 | mid-level anchor output; default is 18% gray | - +-------+-----------+---------+------------------------------------------------------------------------+ - | float | hdrMax | 11.0785 | maximum HDR input that is not clipped | - +-------+-----------+---------+------------------------------------------------------------------------+ - | bool | acesColor | true | apply the ACES color transforms | - +-------+-----------+---------+------------------------------------------------------------------------+ - -To use the popular “Uncharted 2” filmic tone mapping curve instead, set the parameters to the values listed in the table below. - -.. table:: Filmic tone mapping curve parameters. Note that the curve includes an exposure bias to match 18% middle gray. - - ========= ====== - Name Value - ========= ====== - contrast 1.1759 - shoulder 0.9746 - midIn 0.18 - midOut 0.18 - hdrMax 6.3704 - acesColor false - ========= ====== - -Denoiser -^^^^^^^^ - -OSPRay comes with a module that adds support for Intel® Open Image Denoise. This is provided as an optional module as it creates an additional project dependency at compile time. The module implements a “``denoiser``” frame operation, which denoises the entire frame before the frame is completed. - -Rendering ---------- - -Asynchronous Rendering -~~~~~~~~~~~~~~~~~~~~~~ - -Rendering is by default asynchronous (non-blocking), and is done by combining a framebuffer, renderer, camera, and world. - -What to render and how to render it depends on the renderer’s parameters. If the framebuffer supports accumulation (i.e., it was created with ``OSP_FB_ACCUM``) then successive calls to ``ospRenderFrame`` will progressively refine the rendered image. - -To start an render task, use - -.. code:: cpp - - OSPFuture ospRenderFrame(OSPFrameBuffer, OSPRenderer, OSPCamera, OSPWorld); - -This returns an ``OSPFuture`` handle, which can be used to synchronize with the application, cancel, or query for progress of the running task. When ``ospRenderFrame`` is called, there is no guarantee when the associated task will begin execution. - -Progress of a running frame can be queried with the following API function - -.. code:: cpp - - float ospGetProgress(OSPFuture); - -This returns the approximated progress of the task in [0-1]. - -Applications can cancel a currently running asynchronous operation via - -.. code:: cpp - - void ospCancel(OSPFuture); - -Applications can wait on the result of an asynchronous operation, or choose to only synchronize with a specific event. To synchronize with an ``OSPFuture`` use - -.. code:: cpp - - void ospWait(OSPFuture, OSPSyncEvent = OSP_TASK_FINISHED); - -The following are values which can be synchronized with the application - -.. table:: Supported events that can be passed to ``ospWait``. - - +---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+ - | Name | Description | - +=====================+=================================================================================================================================================+ - | OSP_NONE_FINISHED | Do not wait for anything to be finished (immediately return from ``ospWait``) | - +---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSP_WORLD_COMMITTED | Wait for the world to be committed (not yet implemented) | - +---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSP_WORLD_RENDERED | Wait for the world to be rendered, but not post-processing operations (Pixel/Tile/Frame Op) | - +---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSP_FRAME_FINISHED | Wait for all rendering operations to complete | - +---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+ - | OSP_TASK_FINISHED | Wait on full completion of the task associated with the future. The underlying task may involve one or more of the above synchronization events | - +---------------------+-------------------------------------------------------------------------------------------------------------------------------------------------+ - -Currently only rendering can be invoked asynchronously. However, future releases of OSPRay may add more asynchronous versions of API calls (and thus return ``OSPFuture``). - -Applications can query whether particular events are complete with - -.. code:: cpp - - int ospIsReady(OSPFuture, OSPSyncEvent = OSP_TASK_FINISHED); - -As the given running task runs (as tracked by the ``OSPFuture``), applications can query a boolean [0, 1] result if the passed event has been completed. - -Applications can query how long an async task ran with - -.. code:: cpp - - float ospGetTaskDuration(OSPFuture); - -This returns the wall clock execution time of the task in seconds. If the task is still running, this will block until the task is completed. This is useful for applications to query exactly how long an asynchronous task executed without the overhead of measuring both task execution + synchronization by the calling application. - -Asynchronously Rendering and ospCommit() -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The use of either ``ospRenderFrame`` or ``ospRenderFrame`` requires that all objects in the scene being rendered have been committed before rendering occurs. If a call to ``ospCommit()`` happens while a frame is rendered, the result is undefined behavior and should be avoided. - -Synchronous Rendering -~~~~~~~~~~~~~~~~~~~~~ - -For convenience in certain use cases, ``ospray_util.h`` provides a synchronous version of ``ospRenderFrame``: - -.. code:: cpp - - float ospRenderFrameBlocking(OSPFrameBuffer, OSPRenderer, OSPCamera, OSPWorld); - -This version is the equivalent of: - -.. code:: cpp - - ospRenderFrame - ospWait(f, OSP_TASK_FINISHED) - return ospGetVariance(fb) - -This version is closest to ``ospRenderFrame`` from OSPRay v1.x. - -Distributed Rendering with MPI -============================== - -The purpose of the MPI module for OSPRay is to provide distributed rendering capabilities for OSPRay. The module enables image- and data-parallel rendering across HPC clusters using MPI, allowing applications to transparently distribute rendering work, or to render data sets which are too large to fit in memory on a single machine. - -The MPI module provides two OSPRay devices to allow applications to leverage distributed rendering capabilities. The ``mpiOffload`` device provides transparent image-parallel rendering, where the same OSPRay application written for local rendering can be replicated across multiple nodes to distribute the rendering work. The ``mpiDistributed`` device allows MPI distributed applications to use OSPRay for distributed rendering, where each rank can render and independent piece of a global data set, or hybrid rendering where ranks partially or completely share data. - -MPI Offload Rendering ---------------------- - -The ``mpiOffload`` device can be used to distribute image rendering tasks across a cluster without requiring modifications to the application itself. Existing applications using OSPRay for local rendering simply be passed command line arguments to load the module and indicate that the ``mpiOffload`` device should be used for image-parallel rendering. To load the module, pass ``--osp:load-modules=mpi``, to select the MPIOffloadDevice, pass ``--osp:device=mpiOffload``. For example, the ``ospExamples`` application can be run as: - -.. code:: sh - - mpirun -n ./ospExamples --osp:load-modules=mpi --osp:device=mpiOffload - -and will automatically distribute the image rendering tasks among the corresponding ``N`` nodes. Note that in this configuration rank 0 will act as a master/application rank, and will run the user application code but not perform rendering locally. Thus, a minimum of 2 ranks are required, one master to run the application and one worker to perform the rendering. Running with 3 ranks for example would now distribute half the image rendering work to rank 1 and half to rank 2. - -If more control is required over the placement of ranks to nodes, or you want to run a worker rank on the master node as well you can run the application and the ``ospray_mpi_worker`` program through MPI’s MPMD mode. The ``ospray_mpi_worker`` will load the MPI module and select the offload device by default. - -.. code:: sh - - mpirun -n 1 ./ospExamples --osp:load-modules=mpi --osp:device=mpiOffload \ - : -n ./ospray_mpi_worker - -If initializing the ``mpiOffload`` device manually, or passing parameters through the command line, the following parameters can be set: - -.. table:: Parameters specific to the ``mpiOffload`` Device. - - +--------+-------------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +========+=========================+=========+=======================================================================================================================================================================================================+ - | string | mpiMode | mpi | The mode to communicate with the worker ranks. ``mpi`` will assume you’re launching the application and workers in the same mpi command (or split launch command). ``mpi`` is the only supported mode | - +--------+-------------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint | maxCommandBufferEntries | 8192 | Set the max number of commands to buffer before submitting the command buffer to the workers | - +--------+-------------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint | commandBufferSize | 512 MiB | Set the max command buffer size to allow. Units are in MiB. Max size is 1.8 GiB | - +--------+-------------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | uint | maxInlineDataSize | 32 MiB | Set the max size of an OSPData which can be inline’d into the command buffer instead of being sent separately. Max size is half the commandBufferSize. Units are in MiB | - +--------+-------------------------+---------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -The ``maxCommandBufferEntries``, ``commandBufferSize``, and ``maxInlineDataSize`` can also be set via the environment variables: ``OSPRAY_MPI_MAX_COMMAND_BUFFER_ENTRIES``, ``OSPRAY_MPI_COMMAND_BUFFER_SIZE``, and ``OSPRAY_MPI_MAX_INLINE_DATA_SIZE``, respectively. - -The ``mpiOffload`` device does not support multiple init/shutdown cycles. Thus, to run ``ospBenchmark`` for this device make sure to exclude the init/shutdown test by passing ``--benchmark_filter=-ospInit_ospShutdown`` through the command line. - -MPI Distributed Rendering -------------------------- - -While MPI Offload rendering is used to transparently distribute rendering work without requiring modification to the application, MPI Distributed rendering is targeted at use of OSPRay within MPI-parallel applications. The MPI distributed device can be selected by loading the ``mpi`` module, and manually creating and using an instance of the ``mpiDistributed`` device: - -.. code:: cpp - - ospLoadModule("mpi"); - - OSPDevice mpiDevice = ospNewDevice("mpiDistributed"); - ospDeviceCommit(mpiDevice); - ospSetCurrentDevice(mpiDevice); - -Your application can either initialize MPI before-hand, ensuring that ``MPI_THREAD_SERIALIZED`` or higher is supported, or allow the device to initialize MPI on commit. Thread multiple support is required if your application will make MPI calls while rendering asynchronously with OSPRay. When using the distributed device each rank can specify independent local data using the OSPRay API, as if rendering locally. However, when calling ``ospRenderFrameAsync`` the ranks will work collectively to render the data. The distributed device supports both image-parallel, where the data is replicated, and data-parallel, where the data is distributed, rendering modes. The ``mpiDistributed`` device will by default use each rank in ``MPI_COMM_WORLD`` as a render worker; however, it can also take a specific MPI communicator to use as the world communicator. Only those ranks in the specified communicator will participate in rendering. - -.. table:: Parameters specific to the distributed ``mpiDistributed`` Device. - - +---------+-------------------+----------------+---------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=========+===================+================+===========================================================================+ - | void \* | worldCommunicator | MPI_COMM_WORLD | The MPI communicator which the OSPRay workers should treat as their world | - +---------+-------------------+----------------+---------------------------------------------------------------------------+ - -.. table:: Parameters specific to the distributed ``OSPWorld``. - - +----------+--------+---------+--------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +==========+========+=========+======================================================================================+ - | box3f[] | region | NULL | A list of bounding boxes which bound the owned local data to be rendered by the rank | - +----------+--------+---------+--------------------------------------------------------------------------------------+ - -.. table:: Parameters specific to the ``mpiRaycast`` renderer. - - +-------+------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | Type | Name | Default | Description | - +=======+============+===============+==========================================================================================================================================================================+ - | int | aoSamples | 0 | The number of AO samples to take per-pixel | - +-------+------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - | float | aoDistance | 10\ :sup:`20` | The AO ray length to use. Note that if the AO ray would have crossed a rank boundary and ghost geometry is not available, there will be visible artifacts in the shading | - +-------+------------+---------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ - -Image Parallel Rendering in the MPI Distributed Device -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If all ranks specify exactly the same data, the distributed device can be used for image-parallel rendering. This works identical to the offload device, except that the MPI-aware application is able to load data in parallel on each rank rather than loading on the master and shipping data out to the workers. When a parallel file system is available, this can improve data load times. Image-parallel rendering is selected by specifying the same data on each rank, and using any of the existing local renderers (e.g., ``scivis``, ``pathtracer``). See `ospMPIDistribTutorialReplicated `__ for an example. - -Data Parallel Rendering in the MPI Distributed Device -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The MPI Distributed device also supports data-parallel rendering with sort-last compositing. Each rank can specify a different piece of data, as long as the bounding boxes of each rank’s data are non-overlapping. The rest of the scene setup is similar to local rendering; however, for distributed rendering only the ``mpiRaycast`` renderer is supported. This renderer implements a subset of the ``scivis`` rendering features which are suitable for implementation in a distributed environment. - -By default the aggregate bounding box of the instances in the local world will be used as the bounds of that rank’s data. However, when using ghost zones for volume interpolation, geometry or ambient occlusion, each rank’s data can overlap. To clip these non-owned overlap regions out a set of regions (the ``region`` parameter) can pass as a parameter to the ``OSPWorld`` being rendered. Each rank can specify one or more non-overlapping ``box3f``\ ’s which bound the portions of its local data which it is responsible for rendering. See the `ospMPIDistribTutorialVolume `__ for an example. - -Finally, the MPI distributed device also supports hybrid-parallel rendering, where multiple ranks can share a single piece of data. For each shared piece of data the rendering work will be assigned image-parallel among the ranks. Partially-shared regions are determined by finding those ranks specifying data with the same bounds (matching regions) and merging them. See the `ospMPIDistribTutorialPartialRepl `__ for an example. - -Picking on Distributed Data in the MPI Distributed Device -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Calling ``ospPick`` in the distributed device will find and return the closest global object at the screen position on the rank that owns that object. The other ranks will report no hit. Picking in the distributed device takes into account data clipping applied through the ``regions`` parameter to avoid picking ghost data. - -Interaction with User Modules ------------------------------ - -The MPI Offload rendering mode trivially supports user modules, with the caveat that attempting to share data directly with the application (e.g., passing a ``void\ *`` or other tricks to the module) will not work in a distributed environment. Instead, use the ``ospNewSharedData`` API to share data from the application with OSPRay, which will in turn be copied over the network to the workers. - -The MPI Distributed device also supports user modules, as all that is required for compositing the distributed data are the bounds of each rank’s local data. - -MultiDevice Rendering -===================== - -The multidevice module is an experimental OSPRay device type that renders images by delegating off pixel tiles to a number of internal delegate OSPRay devices. Multidevice is in still in an development stage and is currently limited to automatically creating ISPCDevice delegates. - -If you wish to try it set the OSPRAY_NUM_SUBDEVICES environmental variable to the number of subdevices you want to create and tell OSPRay to both load the multidevice extension and create a multidevice for rendering instead of the default ISPCDevice. - -One example in a bash like shell is as follows: - -.. code:: sh - - OSPRAY_NUM_SUBDEVICES=6 ./ospTutorial --osp:load-modules=multidevice --osp:device=multidevice - -.. [1] - The number of items to be copied is defined by the size of the source array. - -.. [2] - For consecutive memory addresses the x-index of the corresponding voxel changes the quickest. - -.. [3] - actually a parallelogram - -.. [4] - including spheres, boxes, infinite planes, closed meshes, closed subdivisions and curves - -.. [5] - ``OSPBounds`` has essentially the same layout as the ``OSP_BOX3F`` ```OSPDataType`` <#data>`__. - -.. [6] - If there are multiple ambient lights then their contribution is added. - -.. [7] - respectively :math:`(127, 127, 255)` for 8 bit textures and :math:`(32767, 32767, 65535)` for 16 bit textures diff --git a/source/elements/oneART/source/ospray-studio.rst b/source/elements/oneART/source/ospray-studio.rst deleted file mode 100644 index 3411ff831..000000000 --- a/source/elements/oneART/source/ospray-studio.rst +++ /dev/null @@ -1,25 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _ospray-studio-section: - -============= -OSPRay Studio -============= - -Intel OSPRay Studio is an open source and interactive visualization -and ray tracing application that leverages Intel OSPRay as its core -rendering engine. It can be used to load complex scenes requiring high -fidelity rendering or very large scenes requiring supercomputing -resources. - -The main control structure is a scene graph which allows users to -create an abstract scene in a directed acyclical graph manner. Scenes -can either be imported or created using scene graph nodes and -structure support. The scenes can then be rendered either with -OSPRay's pathtracer or scivis renderer. - -More information can be found at the `OSPRay Studio website`_. - -.. _`OSPRay Studio website`: https://github.com/ospray/ospray_studio diff --git a/source/elements/oneART/source/ospray.rst b/source/elements/oneART/source/ospray.rst deleted file mode 100644 index f6af7826f..000000000 --- a/source/elements/oneART/source/ospray.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. SPDX-FileCopyrightText: 2021 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _ospray-section: - -======== - OSPRay -======== - -OSPRay is a scalable, and portable ray tracing engine for -high-performance, high-fidelity visualization. - - -.. toctree:: - :maxdepth: 1 - - ospray-intro - ospray-spec - diff --git a/source/elements/oneART/source/overview.inc.rst b/source/elements/oneART/source/overview.inc.rst deleted file mode 100644 index 6713f7a6c..000000000 --- a/source/elements/oneART/source/overview.inc.rst +++ /dev/null @@ -1,22 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -Ray Tracing defines a set of ray tracing and high-fidelity rendering -and computation routines for use in a wide variety of 3D graphics uses -including, film and television photorealistic visual effects and -animation rendering, scientific visualization, high-performance -computing computations, gaming, and more. Ray Tracing is designed to allow -cooperative execution on a wide variety of computational devices: -CPUs, GPUs, FPGAs, and other accelerators, termed “XPU” -computation. The functionality is subdivided into several domains: -geometric ray tracing computations, volumetric computation and -rendering, path guided ray tracing, image denoising, and an integrated -rendering infrastructure and API utilizing all the individual kernel -capabilities integrated into a highly capable, easy to use rendering -engine. - -The individual components and their APIs are described. Other design -considerations and related components that are not necessarily part of -the Ray Tracing specification but that are worth mentioning -will be discussed in the appendix. diff --git a/source/elements/oneART/source/versions.rst b/source/elements/oneART/source/versions.rst deleted file mode 100644 index 677431f83..000000000 --- a/source/elements/oneART/source/versions.rst +++ /dev/null @@ -1,19 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -========== - Versions -========== - -This section describes the versions that were available at time of -publication. See the `latest specification`_ for updates. - -================== ========== -Version Date -================== ========== -`0.5 rev 1`_ 2/15/2021 -================== ========== - -.. _`0.5 rev 1`: https://spec.oneapi.com/oneART/versions/latest -.. _`latest specification`: https://spec.oneapi.com/versions/latest/versions.html diff --git a/source/elements/oneCCL/source/spec/collective_operations.rst b/source/elements/oneCCL/source/spec/collective_operations.rst index 3987ded88..97e947934 100644 --- a/source/elements/oneCCL/source/spec/collective_operations.rst +++ b/source/elements/oneCCL/source/spec/collective_operations.rst @@ -15,6 +15,7 @@ oneCCL specification defines the following collective communication operations: - `Broadcast`_ - `Reduce`_ - `ReduceScatter`_ +- `PointToPoint`_ These operations are collective, meaning that all participants (ranks) of oneCCL communicator should make a call. The order of collective operation calls should be the same across all ranks. @@ -398,6 +399,142 @@ return ``event`` an object to track the progress of the operation +.. _PointToPoint: + +Point-To-Point Operations +************************* + +OneCCL specification defines the following point-to-point operations: + +* Send +* Recv + +In point-to-point communication, two ranks participate in the communication so when a process sends data to a peer rank, +the peer rank needs to post a ``recv`` call with the same datatype and count as the sending rank. + +The current specification only supports blocking ``send`` and ``recv`` and does not support for multiple ``send`` +and ``receive`` operations to proceed concurrently. + +In the ``send`` operation, the peer specifies the destination process, while in the recv operation peer specifies the source process. + +As with the collective operations, the communicator can perform communication operations on host or device memory buffers +depending on the device used to create the communicator. Additionally, communication operations accept an execution +context (stream) and may accept a vector of events on which the communication operation should depend, that is, input dependencies. +The output event object provides the ability to track the operation's progress. + +.. note:: Support for the handling of input events is optional. + +BufferType is used below to define the C++ type of elements in communication operations' data buffers +(``buf``, ``send_buf``, and ``recv_buf``). At least the following types should be supported: ``[u]int{8/16/32/64}_t, float, double``. +The explicit datatype parameter enable data types that cannot be inferred from the function arguments. +For more information, see Custom Datatypes. + +The communication operation accepts a stream object. If a communicator is created from ``native_device_type``, +then the stream translates to ``native_stream_type`` created from the corresponding device. + +The communication operation may accept attribute objects. If that parameter is missed, then the default attribute object is used +(``default__attr``). The default attribute object is provided by the library. +For more information, see Operation Attributes. + +If the arguments provided to a communication operation call do not comply with the requirements of the operation, +the behavior is undefined, unless otherwise specified. + +Send +^^^^ + +A blocking point-to-point communication operation that sends the data in a buf to a peer rank. + +.. code:: cpp + + template & deps = {}); + + event CCL_API send(void *buf, + size_t count, + datatype dtype, + int peer, + const communicator &comm, + const stream &stream, + const pt2pt_attr &attr = default_pt2pt_attr, + const vector_class &deps = {}); + +buf + the buffer with count elements of ``dtype`` that contains the data to be sent +count + the number of elements of type dtype in buf +dtype + the datatype of elements in buf + must be skipped if ``BufferType`` can be inferred + otherwise must be passed explicitly +peer + the destination rank +comm + the communicator that defines a group of ranks for the operation +stream + the stream associated with the operation +attr + optional attributes to customize the operation +deps + an optional vector of the events that the operation should depend on + +return event + an object to track the progress of the operation + +Recv +^^^^^ + +A blocking point-to-point communication operation that receives the data in a buf from a peer rank. + +.. code:: cpp + + template &deps = {}); + + event CCL_API send(void *buf, + size_t count, + datatype dtype, + int peer, + const communicator &comm, + const stream &stream, + const pt2pt_attr &attr = default_pt2pt_attr, + const vector_class &deps = {}); + +buf [out] + the buffer with count elements of dtype that contains the data to be sent +count + the number of elements of type dtype in buf +dtype + the datatype of elements in buf + must be skipped if ``BufferType`` can be inferred + otherwise must be passed explicitly +peer + the destination rank +comm + the communicator that defines a group of ranks for the operation +stream + The stream associated with the operation +attr + optional attributes to customize the operation +deps + an optional vector of the events that the operation should depend on + +return event + object to track the progress of the operation + + + .. note:: See also: diff --git a/source/elements/oneDNN/include/dnnl_graph.hpp b/source/elements/oneDNN/include/dnnl_graph.hpp index 4b6b9b467..f99f10d66 100644 --- a/source/elements/oneDNN/include/dnnl_graph.hpp +++ b/source/elements/oneDNN/include/dnnl_graph.hpp @@ -136,6 +136,8 @@ struct logical_tensor { s8, /// 8-bit unsigned integer. u8, + /// Boolean data type. Size is C++ implementation defined. + boolean }; /// Layout type @@ -455,6 +457,7 @@ struct op { Mish , MishBackward , Multiply , + Pow , PReLU , PReLUBackward , Quantize , @@ -470,6 +473,7 @@ struct op { ReLUBackward , Reorder , Round , + Select , Sigmoid , SigmoidBackward , SoftMax , diff --git a/source/elements/oneDNN/source/data_model/data_types/#int8.rst# b/source/elements/oneDNN/source/data_model/data_types/#int8.rst# new file mode 100644 index 000000000..fefc1eedc --- /dev/null +++ b/source/elements/oneDNN/source/data_model/data_types/#int8.rst# @@ -0,0 +1,206 @@ +.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. default-domain:: cpp + +#### +Int8 +#### + +To push higher performance during inference computations, recent work has +focused on computations that use activations and weights stored at lower +precision to achieve higher throughput. Int8 computations offer improved +performance over higher-precision types because they enable packing more +computations into a single instruction, at the cost of reduced (but +acceptable) accuracy. + +******** +Workflow +******** + +oneDNN support symmetric and asymmetric quantization models. + +.. _int8-quantization-label: + +Quantization Model +================== + +For each int8 tensor, the oneDNN library allows to specify scaling +factors and zero-points (also referred to as quantization +parameters), and assumes the following mathematical relationship: + +.. math:: + + x_{f32}[:] = scale_{x} \cdot (x_{int8}[:] - zp_{x}) + +where :math:`scale_{x}` is a *scaling factor* in float format, +:math:`zp_{x}` is the zero point in int32 format, and +:math:`[:]` is used to denote elementwise application of the formula +to the arrays. In order to provide best performance, oneDNN does not +compute those scaling factors and zero-points as part of primitive +computation. Those should be provided by the user through the +:ref:`attribute mecanism`. + +These quantization parameters can either be computed ahead of time +using calibration tools (*static* quantization) or at runtime based on +the actual minimum and maximum values of a tensor (*dynamic* +quantization). Either method can be used in conjunction with oneDNN, as +the quantization parameters are passed to the oneDNN primitives at +execution time. + +To support int8 quantization, primitives should be created and +executed as follow: + +- during primitive creation, if one or multiple inputs are int8 + (signed or not), then the primitive will behave as a quantized + integer operation. +- still during primitive creation, the dimensionality of the scaling + factors and zero-point should be provided using masks (e.g. one + scale per tensor, one scale per channel, ...). +- finally, during primitive execution, the user must provide the + actual quantization parameters as arguments to the execute function. + Scales shall be f32 values, and zero-points shall be int32 values. + +.. note:: + + For performance reasons, each primitive implementation can support + only a subset of quantization parameter masks. For example, + convolution typically supports per-channel scales (no zero-point) + for weights, and per-tensor scaling factor and zero-point for + activation. + +.. note:: + + Some primitives might use quantization parameters in order to + dequantize/quantize intermediate values. This is for example the + case for the :ref:`rnn-label` primitive, which will dequantize + before applying non linear functions, and will requantize before + executing matrix multiplication operations. + + +Numerical behavior +__________________ + +Primitive implementations are allowed to convert int8 inputs to wider +datatypes (e.g. int16 or int32), as those conversions do not impact +accuracy. + +During execution, primitives should avoid integer overflows and +maintain integer accuracy by using wider datatypes (e.g. int32) for +intermediate values and accumulators. Those are then converted as +necessary before the result is written to the output memory objects. +During that conversion, the behavior in case of underflow/overflow is +undefined (e.g. when converting `s32` to int8). However, it is highly +encouraged for implementations to saturate values. + +When multiple operations are fused in a single primitive using the +:ref:`post-op mecanism`, those are assumed to be +computed in f32 precision. As a result the destination quantization +parameters are applied after the post-ops as follow: + +.. math:: + + \dst[:] = post\_ops(OP(src[:], weights[:], ...)) / scale_{\dst} + zp_{\dst} + +Quantizing/dequantizing values between post-operations can still be +achieved using one of :ref:`eltwise post-ops`, +:ref:`binary post-ops`, or the scale parameter +of the appropriate post-operation. + + +Example: Convolution Quantization Workflow +------------------------------------------ + +Consider a convolution without bias. The tensors are represented as: + +- :math:`\src_{f32}[:] = scale_{\src} \cdot (\src_{int8}[:] - zp_{\src})` +- :math:`\weights_{f32}[:] = scale_{\weights} \cdot \weights_{int8}[:]` +- :math:`\dst_{f32}[:] = scale_{\dst} \cdot (\dst_{int8}[:] - zp_{\dst})` + +Here the :math:`\src_{f32}, \weights_{f32}, \dst_{f32}` are not +computed at all, the whole work happens with int8 tensors.So the task +is to compute the :math:`\dst_{int8}` tensor, using the `\src_{int8}`, +`\weights_{int8}` tensors passed at execution time, as well as the +corresponding quantization parameters `scale_{\src}, scale_{\weights}, +scale_{\dst}` and `zero_point{\src}, +zero_point_{\dst}`. Mathematically, the computations are: + +.. math:: + + \dst_{int8}[:] = + \operatorname{f32\_to\_int8}( + scale_{\src} \cdot scale_{\weights} \cdot + \operatorname{s32\_to\_f32}(conv_{s32}(\src_{int8}, \weights_{int8}) + - zp_{\src} \cdot comp_{s32}) / scale_{\dst} + + zp_{\dst} ) + +where + +- :math:`conv_{s32}` is just a regular convolution which takes source and + weights with int8 data type and compute the result in int32 data type (int32 + is chosen to avoid overflows during the computations); + +- :math:`comp_{s32}` is a compensation term to account for + `\src` non-zero zero point. This term is computed by the oneDNN + library and can typically be pre-computed ahead of time, for example + during weights reorder. + +- :math:`\operatorname{f32\_to\_s8}()` converts an `f32` value to `s8` with + potential saturation if the values are out of the range of the int8 data + type. + +- :math:`\operatorname{s32\_to\_f32}()` converts an `int8` value to + `f32` with potential rounding. This conversion is typically + necessary to apply `f32` scaling factors. + + +Per-Channel Scaling +------------------- + +Primitives may have limited support of multiple scales for a quantized tensor. +The most popular use case is the :ref:`convolution-label` primitives that +support per-output-channel scaling factors for the weights, meaning that the +actual convolution computations would need to scale different output channels +differently. + +- :math:`\src_{f32}(n, ic, ih, iw) = scale_{\src} \cdot \src_{int8}(n, ic, ih, iw)` + +- :math:`\weights_{f32}(oc, ic, kh, kw) = scale_{\weights}(oc) \cdot \weights_{int8}(oc, ic, kh, kw)` + +- :math:`\dst_{f32}(n, oc, oh, ow) = scale_{\dst} \cdot \dst_{int8}(n, oc, oh, ow)` + +Note that now the weights' scaling factor depends on :math:`oc`. + +To compute the :math:`\dst_{int8}` we need to perform the following: + +.. math:: + + \dst_{int8}(n, oc, oh, ow) = + \operatorname{f32\_to\_int8}( + \frac{scale_{\src} \cdot scale_{\weights}(oc)}{scale_{\dst}} \cdot + conv_{s32}(\src_{int8}, \weights_{int8})|_{(n, oc, oh, ow)} + ). + +The user is responsible for preparing quantized weights accordingly. To do that, +oneDNN provides reorders that can perform per-channel scaling: + +.. math:: + + \weights_{int8}(oc, ic, kh, kw) = + \operatorname{f32\_to\_int8}( + \weights_{f32}(oc, ic, kh, kw) / scale_{weights}(oc) + ). + +The :ref:`attributes-quantization-label` describes what kind of quantization +model oneDNN supports. + +******* +Support +******* + +oneDNN supports int8 computations for inference by allowing to specify that +primitive input and output memory objects use int8 data types. + + +.. vim: ts=3 sw=3 et spell spelllang=en diff --git a/source/elements/oneDNN/source/data_model/data_types/.#int8.rst b/source/elements/oneDNN/source/data_model/data_types/.#int8.rst new file mode 120000 index 000000000..a2ef8f87d --- /dev/null +++ b/source/elements/oneDNN/source/data_model/data_types/.#int8.rst @@ -0,0 +1 @@ +rscohn1@anpfclxlin02.462074:1674669906 \ No newline at end of file diff --git a/source/elements/oneDNN/source/data_model/data_types/int8.rst b/source/elements/oneDNN/source/data_model/data_types/int8.rst index 0b08821cb..09773bb12 100644 --- a/source/elements/oneDNN/source/data_model/data_types/int8.rst +++ b/source/elements/oneDNN/source/data_model/data_types/int8.rst @@ -19,7 +19,7 @@ acceptable) accuracy. Workflow ******** -oneDNN support symmetric and assymetric quantization models. +oneDNN support symmetric and asymmetric quantization models. .. _int8-quantization-label: @@ -27,7 +27,7 @@ Quantization Model ================== For each int8 tensor, the oneDNN library allows to specify scaling -factors and zero-points (also refered to as quantization +factors and zero-points (also referred to as quantization parameters), and assumes the following mathematical relationship: .. math:: diff --git a/source/elements/oneDNN/source/graph/programming_model.rst b/source/elements/oneDNN/source/graph/programming_model.rst index 45c951441..7ff22eb93 100644 --- a/source/elements/oneDNN/source/graph/programming_model.rst +++ b/source/elements/oneDNN/source/graph/programming_model.rst @@ -177,14 +177,14 @@ tensors with the same id should be identical at the graph construction time. Once the graph is fully described, |finalize| should be called. This prevents any other operation from being added, and allows to call |get_partitions| in order to get the set of partitions for that -graph. The graph doesn’t hold any meaning to the user after +graph. The graph does not hold any meaning to the user after partitioning and should freed by the user. All the OPs added to the graph will be contained in one of the returned partitions. If an OP is not supported by the oneDNN Graph API implementation, the corresponding partition will be marked as “not supported”. Users can check the supporting status of a partition via the |is_supported|. Partitions -should not form cyclic dependence within the graph. If user doesn’t pass a +should not form cyclic dependence within the graph. If user does not pass a complete graph, it is the user's responsibility to detect any dependence cycle between the partitions and operations not passing to oneDNN Graph implementation. diff --git a/source/elements/oneDPL/source/common.rst b/source/elements/oneDPL/source/common.rst index ad3908974..1acb0c473 100644 --- a/source/elements/oneDPL/source/common.rst +++ b/source/elements/oneDPL/source/common.rst @@ -10,6 +10,6 @@ functionality including parallel algorithms, oneDPL execution policies, etc. For the subset of the standard C++ library for kernels, the standard class and function names are also aliased in :code:`namespace oneapi::dpl`. -oneDPL uses nested namespaces for the functionality aligned with C++ standard. +oneDPL uses nested namespaces for the functionality aligned with the C++ standard. The names of those namespaces are the same as in :code:`namespace std`. For example, oneDPL execution policies are provided in :code:`namespace oneapi::dpl::execution`. diff --git a/source/elements/oneDPL/source/index.rst b/source/elements/oneDPL/source/index.rst index 84571a504..ec941bd85 100644 --- a/source/elements/oneDPL/source/index.rst +++ b/source/elements/oneDPL/source/index.rst @@ -13,6 +13,10 @@ specified in the `C++ standard`_, with extensions to support data parallelism and offloading to devices, and with extensions to simplify its usage for implementing data parallel algorithms. +.. note:: + Unless specified otherwise, in this document the `C++ standard`_ refers to + ISO/IEC 14882:2017 Programming languages - C++, commonly known as C++17. + The library is comprised of the following components: - :doc:`Parallel API `: @@ -26,12 +30,12 @@ The library is comprised of the following components: - :doc:`SYCL Kernels API `: - * A subset of the `C++ standard`_ library which can be used with + * A subset of the C++ standard library which can be used with buffers and data parallel kernels. * Support of random number generation including engines and distributions. - * Various utilities in addition to C++ standard functionality. + * Various utilities in addition to the C++ standard functionality. .. toctree:: diff --git a/source/elements/oneDPL/source/parallel_api.rst b/source/elements/oneDPL/source/parallel_api.rst index 24ca5c91d..b3f96f359 100644 --- a/source/elements/oneDPL/source/parallel_api.rst +++ b/source/elements/oneDPL/source/parallel_api.rst @@ -5,18 +5,19 @@ Parallel API ------------ -oneDPL provides the set of algorithms with execution policies as defined by the `C++ Standard`_. +oneDPL provides the set of parallel algorithms as defined by the `C++ Standard`_, +including parallel algorithms added in the 6th edition known as C++20. All those algorithms work with *C++ Standard aligned execution policies* and with *DPC++ execution policies*. Additionally, oneDPL provides wrapper functions for `SYCL`_ buffers, special iterators, and -a set of non-standard parallel algortithms. +a set of non-standard parallel algorithms. C++ Standard aligned execution policies +++++++++++++++++++++++++++++++++++++++ oneDPL has the set of execution policies and related utilities that are semantically aligned -with the C++ Standard: +with the `C++ Standard`_, 6th edition (C++20): .. code:: cpp @@ -196,30 +197,30 @@ Buffer wrappers namespace oneapi { namespace dpl { - template < typename T, typename AllocatorT, sycl::access::mode Mode > + template < typename T, typename AllocatorT, typename TagT > /*unspecified*/ begin( sycl::buffer buf, - sycl::mode_tag_t tag = sycl::read_write ); + TagT tag = sycl::read_write ); - template < typename T, typename AllocatorT, sycl::access::mode Mode > + template < typename T, typename AllocatorT, typename TagT > /*unspecified*/ begin( sycl::buffer buf, - sycl::mode_tag_t tag, sycl::property::noinit ); + TagT tag, sycl::property::no_init ); template < typename T, typename AllocatorT > /*unspecified*/ begin( sycl::buffer buf, - sycl::property::noinit ); + sycl::property::no_init ); - template < typename T, typename AllocatorT, sycl::access::mode Mode > + template < typename T, typename AllocatorT, typename TagT > /*unspecified*/ end( sycl::buffer buf, - sycl::mode_tag_t tag = sycl::read_write ); + TagT tag = sycl::read_write ); - template < typename T, typename AllocatorT, sycl::access::mode Mode > + template < typename T, typename AllocatorT, typename TagT > /*unspecified*/ end( sycl::buffer buf, - sycl::mode_tag_t tag, sycl::property::noinit ); + TagT tag, sycl::property::no_init ); template < typename T, typename AllocatorT > /*unspecified*/ end( sycl::buffer buf, - sycl::property::noinit ); + sycl::property::no_init ); } } @@ -240,8 +241,8 @@ of an unspecified type that satisfies the following requirements: When invoking an algorithm, the buffer passed to ``begin`` should be the same as the buffer passed to ``end``. Otherwise, the behavior is undefined. -``sycl::mode_tag_t`` and ``sycl::property::noinit`` parameters allow to specify -an access mode to be used for accessing the buffer by algorithms. +SYCL deduction tags (the ``TagT`` parameters) and ``sycl::property::no_init`` +allow to specify an access mode to be used by algorithms for accessing the buffer. The mode serves as a hint, and can be overridden depending on semantics of the algorithm. When invoking an algorithm, the same access mode arguments should be used for ``begin`` and ``end``. Otherwise, the behavior is undefined. @@ -251,9 +252,9 @@ for ``begin`` and ``end``. Otherwise, the behavior is undefined. using namespace oneapi; auto buf_begin = dpl::begin(buf, sycl::write_only); auto buf_end_1 = dpl::end(buf, sycl::write_only); - auto buf_end_2 = dpl::end(buf, sycl::write_only, sycl::noinit); - dpl::fill(dpl::dpcpp_default, buf_begin, buf_end_1, 42); // allowed - dpl::fill(dpl::dpcpp_default, buf_begin, buf_end_2, 42); // not allowed + auto buf_end_2 = dpl::end(buf, sycl::write_only, sycl::no_init); + dpl::fill(dpl::execution::dpcpp_default, buf_begin, buf_end_1, 42); // allowed + dpl::fill(dpl::execution::dpcpp_default, buf_begin, buf_end_2, 42); // not allowed Iterators +++++++++ diff --git a/source/elements/oneDPL/source/sycl_kernels_api.rst b/source/elements/oneDPL/source/sycl_kernels_api.rst index dc1a258af..507e8a5ff 100644 --- a/source/elements/oneDPL/source/sycl_kernels_api.rst +++ b/source/elements/oneDPL/source/sycl_kernels_api.rst @@ -14,14 +14,16 @@ CPU-based platform. .. _`C++ Standard`: https://isocpp.org/std/the-standard + Random Number Generation ++++++++++++++++++++++++ oneDPL provides a subset of the standard C++ pseudo-random number generation functionality -suitable to use within SYCL kernels. The APIs are defined in the :code:`` header. +suitable to use within SYCL kernels. The APIs are defined in the :code:`` header. Supported functionality: ------------------------ + - Engine class templates: - ``linear_congruential_engine`` - ``subtract_with_carry_engine`` @@ -46,10 +48,11 @@ Supported functionality: - ``cauchy_distribution`` - ``extreme_value_distribution`` -Additionally, ``sycl::vec<>`` can be used as the result type for engines, engine adaptors, and distributions. +``linear_congruential_engine`` and ``subtract_with_carry_engine`` satisfy the uniform random bit generator requirements. Limitations: ------------ + The following deviations from the `C++ Standard`_ may apply: - ``random_device`` and ``seed_seq`` classes and related APIs in other classes are not required; @@ -57,6 +60,100 @@ The following deviations from the `C++ Standard`_ may apply: - specifying the size of a random number engine's state is not required; - distributions are only required to operate with floating point types applicable to supported SYCL devices. +Extensions: +----------- + +As an extension to the `C++ Standard`_, ``sycl::vec`` can be used as the data type template parameter for +engines, engine adaptors, and distributions, where ``Type`` is one of data types supported by the corresponding +class template in the standard. For such template instantiations, the ``result_type`` is also defined to +``sycl::vec``. + +Engines, engine adaptors, and distributions additionally define ``scalar_type``, equivalent to the following: + +- ``using scalar_type = typename result_type::element_type;`` if ``result_type`` is ``sycl::vec``, +- otherwise, ``using scalar_type = result_type;`` + +The ``scalar_type`` is used instead of ``result_type`` in all contexts where a scalar data type is expected, including + +- the type of configuration parameters and properties, +- the seed value type, +- the input parameters of constructors, +- the return value type of ``min()`` and ``max()`` methods, etc. + +Since ``scalar_type`` is the same as ``result_type`` except for template instantiations with ``sycl::vec``, +class templates still meet the applicable requirements of the `C++ Standard`_. + +When instantiated with ``sycl::vec``, ``linear_congruential_engine`` and ``subtract_with_carry_engine`` may not +formally satisfy the uniform random bit generator requirements defined by the `C++ Standard`_. Instead, the following +alternative requirements apply: for an engine object ``g`` of type ``G``, + +- ``G::scalar_type`` is an unsigned integral type same as ``sycl::vec::element_type``, +- ``G::min()`` and ``G::max()`` return a value of ``G::scalar_type``, +- for each index ``i`` in the range [``0``, ``N``), ``G::min() <= g()[i]`` and ``g()[i] <= G::max()``. + +Effectively, these engines satisfy the standard *uniform random bit generator* requirements for each element +of a ``sycl::vec`` returned by their ``operator()``. + +Similarly, for a distribution object ``d`` of a type ``D`` that is a template instantiated with ``sycl::vec``: + +- ``D::scalar_type`` is the same as ``sycl::vec::element_type``, +- ``D::min()`` and ``D::max()`` return a value of ``D::scalar_type``, and ``D::min() <= D::max()``, +- ``operator()`` of a distribution returns a ``sycl::vec`` filled with random values + in the closed interval ``[D::min(), D::max()]``; + +The following engines and engine adaptors with predefined parameters are defined: + +.. code:: cpp + + template + using minstd_rand0_vec = linear_congruential_engine, 16807, 0, 2147483647>; + + template + using minstd_rand_vec = linear_congruential_engine, 48271, 0, 2147483647>; + + template + using ranlux24_base_vec = subtract_with_carry_engine, 24, 10, 24>; + + template + using ranlux48_base_vec = subtract_with_carry_engine, 48, 5, 12>; + + template + using ranlux24_vec = discard_block_engine, 223, 23>; + + template + using ranlux48_vec = discard_block_engine, 389, 11>; + +Except for producing a ``sycl::vec`` of random values per invocation, the behavior of these engines is equivalent to +the corresponding scalar engines, as described in the following table: + +.. container:: tablenoborder + + .. list-table:: + :header-rows: 1 + + * - Engines and engine adaptors based on ``sycl::vec<>`` + - C++ standard analogue + - The 10000th scalar random value consecutively produced by a default-constructed object + * - ``minstd_rand0_vec`` + - ``minstd_rand0`` + - 1043618065 + * - ``minstd_rand_vec`` + - ``minstd_rand`` + - 399268537 + * - ``ranlux24_base_vec`` + - ``ranlux24_base`` + - 7937952 + * - ``ranlux48_base_vec`` + - ``ranlux48_base`` + - 61839128582725 + * - ``ranlux24_vec`` + - ``ranlux24`` + - 9901578 + * - ``ranlux48_vec`` + - ``ranlux48`` + - 1112339016 + + Function Objects ++++++++++++++++ @@ -75,5 +172,5 @@ The oneDPL function objects are defined in the :code:`` h } } -The :code:`oneapi::dpl::identity` class implements an identity operation. Its function operator +The :code:`oneapi::dpl::identity` class implements an identity operation. Its function operator receives an instance of a type and returns the argument unchanged. diff --git a/source/elements/oneMKL/source/architecture/api_design.inc.rst b/source/elements/oneMKL/source/architecture/api_design.inc.rst index 9c44584c5..00d8753cc 100644 --- a/source/elements/oneMKL/source/architecture/api_design.inc.rst +++ b/source/elements/oneMKL/source/architecture/api_design.inc.rst @@ -23,7 +23,7 @@ namespace oneMKL domain or content ``oneapi::mkl::blas`` Dense linear algebra routines from BLAS and BLAS like extensions. The oneapi::mkl::blas namespace should contain two namespaces column_major and row_major to support both matrix layouts. See :ref:`onemkl_blas` ``oneapi::mkl::lapack`` Dense linear algebra routines from LAPACK and LAPACK like extensions. See :ref:`onemkl_lapack` ``oneapi::mkl::sparse`` Sparse linear algebra routines from Sparse BLAS and Sparse Solvers. See :ref:`onemkl_sparse_linear_algebra` -``oneapi::mkl::dft`` Discrete and fast Fourier transformations. See :ref:`onemkl_dft` +``oneapi::mkl::dft`` Discrete Fourier Transforms. See :ref:`onemkl_dft` ``oneapi::mkl::rng`` Random number generator routines. See :ref:`onemkl_rng` ``oneapi::mkl::vm`` Vector mathematics routines, e.g. trigonometric, exponential functions acting on elements of a vector. See :ref:`onemkl_vm` ======================== ======================================================================================================= diff --git a/source/elements/oneMKL/source/domains/blas/axpby.rst b/source/elements/oneMKL/source/domains/blas/axpby.rst index 2fc7fd490..8e499c173 100644 --- a/source/elements/oneMKL/source/domains/blas/axpby.rst +++ b/source/elements/oneMKL/source/domains/blas/axpby.rst @@ -127,9 +127,9 @@ axpby (USM Version) namespace oneapi::mkl::blas::column_major { sycl::event axpby(sycl::queue &queue, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, - const T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) } @@ -138,9 +138,9 @@ axpby (USM Version) namespace oneapi::mkl::blas::row_major { sycl::event axpby(sycl::queue &queue, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, - const T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) } @@ -156,10 +156,10 @@ axpby (USM Version) Number of elements in vector ``x`` and ``y``. alpha - Specifies the scalar alpha. + Specifies the scalar alpha. See :ref:`value_or_pointer` for more details. beta - Specifies the scalar beta. + Specifies the scalar beta. See :ref:`value_or_pointer` for more details. x Pointer to the input vector ``x``. The allocated memory must be diff --git a/source/elements/oneMKL/source/domains/blas/axpy.rst b/source/elements/oneMKL/source/domains/blas/axpy.rst index deffa4474..d44d7bb9f 100644 --- a/source/elements/oneMKL/source/domains/blas/axpy.rst +++ b/source/elements/oneMKL/source/domains/blas/axpy.rst @@ -138,7 +138,7 @@ axpy (USM Version) namespace oneapi::mkl::blas::column_major { sycl::event axpy(sycl::queue &queue, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *y, @@ -150,7 +150,7 @@ axpy (USM Version) namespace oneapi::mkl::blas::row_major { sycl::event axpy(sycl::queue &queue, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *y, @@ -169,7 +169,7 @@ axpy (USM Version) Number of elements in vector ``x``. alpha - Specifies the scalar alpha. + Specifies the scalar alpha. See :ref:`value_or_pointer` for more details. x Pointer to the input vector ``x``. The array holding the vector diff --git a/source/elements/oneMKL/source/domains/blas/axpy_batch.rst b/source/elements/oneMKL/source/domains/blas/axpy_batch.rst index 68f8ffd51..cc3a8ff43 100644 --- a/source/elements/oneMKL/source/domains/blas/axpy_batch.rst +++ b/source/elements/oneMKL/source/domains/blas/axpy_batch.rst @@ -292,7 +292,7 @@ The total number of vectors in ``x`` and ``y`` are given by the ``batch_size`` p namespace oneapi::mkl::blas::column_major { sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, std::int64_t stridex, @@ -307,7 +307,7 @@ The total number of vectors in ``x`` and ``y`` are given by the ``batch_size`` p namespace oneapi::mkl::blas::row_major { sycl::event axpy_batch(sycl::queue &queue, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, std::int64_t stridex, @@ -329,7 +329,7 @@ The total number of vectors in ``x`` and ``y`` are given by the ``batch_size`` p Number of elements in ``X`` and ``Y``. alpha - Specifies the scalar ``alpha``. + Specifies the scalar ``alpha``. See :ref:`value_or_pointer` for more details. x Pointer to input vectors ``X`` with size ``stridex`` * ``batch_size``. diff --git a/source/elements/oneMKL/source/domains/blas/gbmv.rst b/source/elements/oneMKL/source/domains/blas/gbmv.rst index e4bc714d0..bb3e22b81 100644 --- a/source/elements/oneMKL/source/domains/blas/gbmv.rst +++ b/source/elements/oneMKL/source/domains/blas/gbmv.rst @@ -195,12 +195,12 @@ gbmv (USM Version) std::int64_t n, std::int64_t kl, std::int64_t ku, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -214,12 +214,12 @@ gbmv (USM Version) std::int64_t n, std::int64_t kl, std::int64_t ku, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -253,7 +253,7 @@ gbmv (USM Version) zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. The array holding input matrix @@ -276,7 +276,7 @@ gbmv (USM Version) Stride of vector ``x``. Must not be zero. beta - Scaling factor for vector ``y``. + Scaling factor for vector ``y``. See :ref:`value_or_pointer` for more details. y Pointer to input/output vector ``y``. The length ``len`` of diff --git a/source/elements/oneMKL/source/domains/blas/gemm.rst b/source/elements/oneMKL/source/domains/blas/gemm.rst index ce333f2c4..d5011fbf5 100644 --- a/source/elements/oneMKL/source/domains/blas/gemm.rst +++ b/source/elements/oneMKL/source/domains/blas/gemm.rst @@ -308,12 +308,12 @@ gemm (USM Version) std::int64_t m, std::int64_t n, std::int64_t k, - Ts alpha, + value_or_pointer alpha, const Ta *a, std::int64_t lda, const Tb *b, std::int64_t ldb, - Ts beta, + value_or_poitner beta, Tc *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -327,12 +327,12 @@ gemm (USM Version) std::int64_t m, std::int64_t n, std::int64_t k, - Ts alpha, + value_or_pointer alpha, const Ta *a, std::int64_t lda, const Tb *b, std::int64_t ldb, - Ts beta, + value_or_pointer beta, Tc *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -373,7 +373,7 @@ gemm (USM Version) alpha - Scaling factor for the matrix-matrix product. + Scaling factor for the matrix-matrix product. See :ref:`value_or_pointer` for more details. a @@ -453,7 +453,7 @@ gemm (USM Version) - ``ldb`` must be at least ``k``. beta - Scaling factor for matrix ``C``. + Scaling factor for matrix ``C``. See :ref:`value_or_pointer` for more details. c The pointer to input/output matrix ``C``. It must have a diff --git a/source/elements/oneMKL/source/domains/blas/gemm_batch.rst b/source/elements/oneMKL/source/domains/blas/gemm_batch.rst index 08b870b46..aba66c725 100644 --- a/source/elements/oneMKL/source/domains/blas/gemm_batch.rst +++ b/source/elements/oneMKL/source/domains/blas/gemm_batch.rst @@ -593,14 +593,14 @@ in ``a``, ``b`` and ``c`` are given by the ``batch_size`` parameter. std::int64_t m, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stridea, const T *b, std::int64_t ldb, std::int64_t strideb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, std::int64_t stridec, @@ -616,14 +616,14 @@ in ``a``, ``b`` and ``c`` are given by the ``batch_size`` parameter. std::int64_t m, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stridea, const T *b, std::int64_t ldb, std::int64_t strideb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, std::int64_t stridec, @@ -657,7 +657,7 @@ in ``a``, ``b`` and ``c`` are given by the ``batch_size`` parameter. least zero. alpha - Scaling factor for the matrix-matrix products. + Scaling factor for the matrix-matrix products. See :ref:`value_or_pointer` for more details. a Pointer to input matrices ``A`` with size ``stridea`` * ``batch_size``. @@ -704,7 +704,7 @@ in ``a``, ``b`` and ``c`` are given by the ``batch_size`` parameter. Stride between different ``B`` matrices. beta - Scaling factor for the matrices ``C``. + Scaling factor for the matrices ``C``. See :ref:`value_or_pointer` for more details. c Pointer to input/output matrices ``C`` with size ``stridec`` * ``batch_size``. diff --git a/source/elements/oneMKL/source/domains/blas/gemm_bias.rst b/source/elements/oneMKL/source/domains/blas/gemm_bias.rst index 3d894f599..93d1a3fa6 100644 --- a/source/elements/oneMKL/source/domains/blas/gemm_bias.rst +++ b/source/elements/oneMKL/source/domains/blas/gemm_bias.rst @@ -311,14 +311,14 @@ gemm_bias (USM Version) std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, + value_or_pointer alpha, const Ta *a, std::int64_t lda, Ta ao, const Tb *b, std::int64_t ldb, Tb bo, - float beta, + value_or_pointer beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, @@ -334,14 +334,14 @@ gemm_bias (USM Version) std::int64_t m, std::int64_t n, std::int64_t k, - float alpha, + value_or_pointer alpha, const Ta *a, std::int64_t lda, Ta ao, const Tb *b, std::int64_t ldb, Tb bo, - float beta, + value_or_pointer beta, std::int32_t *c, std::int64_t ldc, const std::int32_t *co, @@ -385,7 +385,7 @@ gemm_bias (USM Version) at least zero. alpha - Scaling factor for the matrix-matrix product. + Scaling factor for the matrix-matrix product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. @@ -470,7 +470,7 @@ gemm_bias (USM Version) Specifies the scalar offset value for matrix ``B``. beta - Scaling factor for matrix ``C``. + Scaling factor for matrix ``C``. See :ref:`value_or_pointer` for more details. c Pointer to input/output matrix ``C``. It must have a diff --git a/source/elements/oneMKL/source/domains/blas/gemmt.rst b/source/elements/oneMKL/source/domains/blas/gemmt.rst index 6f57e8128..fc27d0d69 100644 --- a/source/elements/oneMKL/source/domains/blas/gemmt.rst +++ b/source/elements/oneMKL/source/domains/blas/gemmt.rst @@ -259,12 +259,12 @@ gemmt (USM Version) onemkl::transpose transb, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -278,12 +278,12 @@ gemmt (USM Version) onemkl::transpose transb, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -323,7 +323,7 @@ gemmt (USM Version) at least zero. alpha - Scaling factor for the matrix-matrix product. + Scaling factor for the matrix-matrix product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. @@ -402,7 +402,7 @@ gemmt (USM Version) - ``ldb`` must be at least ``k``. beta - Scaling factor for matrix ``C``. + Scaling factor for matrix ``C``. See :ref:`value_or_pointer` for more details. c Pointer to input/output matrix ``C``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/gemv.rst b/source/elements/oneMKL/source/domains/blas/gemv.rst index 6097a15e7..3e29aff73 100644 --- a/source/elements/oneMKL/source/domains/blas/gemv.rst +++ b/source/elements/oneMKL/source/domains/blas/gemv.rst @@ -177,12 +177,12 @@ gemv (USM Version) onemkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -194,12 +194,12 @@ gemv (USM Version) onemkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -227,7 +227,7 @@ gemv (USM Version) of ``n`` must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. a Pointer to the input matrix ``A``. Must have a size of at @@ -251,7 +251,7 @@ gemv (USM Version) The stride of vector ``x``. Must not be zero. beta - The scaling factor for vector ``y``. + The scaling factor for vector ``y``. See :ref:`value_or_pointer` for more details. y Pointer to input/output vector ``y``. The length ``len`` of diff --git a/source/elements/oneMKL/source/domains/blas/gemv_batch.rst b/source/elements/oneMKL/source/domains/blas/gemv_batch.rst index 465001d71..9d95af749 100644 --- a/source/elements/oneMKL/source/domains/blas/gemv_batch.rst +++ b/source/elements/oneMKL/source/domains/blas/gemv_batch.rst @@ -380,14 +380,14 @@ total number of vectors in ``x`` and ``y`` and matrices in ``A`` are given by th onemkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stridea, const T *x, std::int64_t incx, std::int64_t stridex, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, std::int64_t stridey, @@ -401,14 +401,14 @@ total number of vectors in ``x`` and ``y`` and matrices in ``A`` are given by th onemkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stridea, const T *x, std::int64_t incx, std::int64_t stridex, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, std::int64_t stridey, @@ -435,7 +435,7 @@ total number of vectors in ``x`` and ``y`` and matrices in ``A`` are given by th Number of columns of op(``A``). Must be at least zero. alpha - Scaling factor for the matrix-vector products. + Scaling factor for the matrix-vector products. See :ref:`value_or_pointer` for more details. a Pointer to the input matrices ``A`` with size ``stridea`` * ``batch_size``. @@ -458,7 +458,7 @@ total number of vectors in ``x`` and ``y`` and matrices in ``A`` are given by th Stride between different consecutive ``X`` vectors, must be at least 0. beta - Scaling factor for the vector ``Y``. + Scaling factor for the vector ``Y``. See :ref:`value_or_pointer` for more details. y Pointer to the input/output vectors ``Y`` with size ``stridey`` * ``batch_size``. diff --git a/source/elements/oneMKL/source/domains/blas/ger.rst b/source/elements/oneMKL/source/domains/blas/ger.rst index 376ce6383..5179367eb 100644 --- a/source/elements/oneMKL/source/domains/blas/ger.rst +++ b/source/elements/oneMKL/source/domains/blas/ger.rst @@ -159,7 +159,7 @@ ger (USM Version) sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -174,7 +174,7 @@ ger (USM Version) sycl::event ger(sycl::queue &queue, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -198,7 +198,7 @@ ger (USM Version) Number of columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to input vector ``x``. The array holding input vector diff --git a/source/elements/oneMKL/source/domains/blas/gerc.rst b/source/elements/oneMKL/source/domains/blas/gerc.rst index 4acb57c70..da2651358 100644 --- a/source/elements/oneMKL/source/domains/blas/gerc.rst +++ b/source/elements/oneMKL/source/domains/blas/gerc.rst @@ -161,7 +161,7 @@ gerc (USM Version) sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -176,7 +176,7 @@ gerc (USM Version) sycl::event gerc(sycl::queue &queue, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -200,7 +200,7 @@ gerc (USM Version) Number of columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to the input vector ``x``. The array holding input diff --git a/source/elements/oneMKL/source/domains/blas/geru.rst b/source/elements/oneMKL/source/domains/blas/geru.rst index 8e609b3a9..b9952a75d 100644 --- a/source/elements/oneMKL/source/domains/blas/geru.rst +++ b/source/elements/oneMKL/source/domains/blas/geru.rst @@ -159,7 +159,7 @@ geru (USM Version) sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -174,7 +174,7 @@ geru (USM Version) sycl::event geru(sycl::queue &queue, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -198,7 +198,7 @@ geru (USM Version) Number of columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to the input vector ``x``. The array holding input diff --git a/source/elements/oneMKL/source/domains/blas/hbmv.rst b/source/elements/oneMKL/source/domains/blas/hbmv.rst index 6ff016158..b1c42233b 100644 --- a/source/elements/oneMKL/source/domains/blas/hbmv.rst +++ b/source/elements/oneMKL/source/domains/blas/hbmv.rst @@ -169,12 +169,12 @@ hbmv (USM Version) onemkl::uplo upper_lower, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -186,12 +186,12 @@ hbmv (USM Version) onemkl::uplo upper_lower, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -215,7 +215,7 @@ hbmv (USM Version) zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. a Pointer to the input matrix ``A``. The array holding input @@ -237,7 +237,7 @@ hbmv (USM Version) Stride of vector ``x``. Must not be zero. beta - Scaling factor for vector ``y``. + Scaling factor for vector ``y``. See :ref:`value_or_pointer` for more details. y Pointer to input/output vector ``y``. The array holding diff --git a/source/elements/oneMKL/source/domains/blas/hemm.rst b/source/elements/oneMKL/source/domains/blas/hemm.rst index 0f29b815d..e59b3e264 100644 --- a/source/elements/oneMKL/source/domains/blas/hemm.rst +++ b/source/elements/oneMKL/source/domains/blas/hemm.rst @@ -210,12 +210,12 @@ hemm (USM Version) onemkl::uplo upper_lower, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -228,12 +228,12 @@ hemm (USM Version) onemkl::uplo upper_lower, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -266,7 +266,7 @@ hemm (USM Version) The value of ``n`` must be at least zero. alpha - Scaling factor for the matrix-matrix product. + Scaling factor for the matrix-matrix product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. Must have size at least @@ -293,7 +293,7 @@ hemm (USM Version) least ``n`` if column major layout is used to store matrices. beta - Scaling factor for matrix ``C``. + Scaling factor for matrix ``C``. See :ref:`value_or_pointer` for more details. c The pointer to input/output matrix ``C``. It must have a diff --git a/source/elements/oneMKL/source/domains/blas/hemv.rst b/source/elements/oneMKL/source/domains/blas/hemv.rst index 0e686af5b..fba44d1a8 100644 --- a/source/elements/oneMKL/source/domains/blas/hemv.rst +++ b/source/elements/oneMKL/source/domains/blas/hemv.rst @@ -161,12 +161,12 @@ hemv (USM Version) sycl::event hemv(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -177,12 +177,12 @@ hemv (USM Version) sycl::event hemv(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -202,7 +202,7 @@ hemv (USM Version) Number of rows and columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. The array holding input matrix @@ -223,7 +223,7 @@ hemv (USM Version) Stride of vector ``x``. Must not be zero. beta - Scaling factor for vector ``y``. + Scaling factor for vector ``y``. See :ref:`value_or_pointer` for more details. y Pointer to input/output vector ``y``. The array holding diff --git a/source/elements/oneMKL/source/domains/blas/her.rst b/source/elements/oneMKL/source/domains/blas/her.rst index f5d08fa13..ffe0d4f9d 100644 --- a/source/elements/oneMKL/source/domains/blas/her.rst +++ b/source/elements/oneMKL/source/domains/blas/her.rst @@ -151,7 +151,7 @@ her (USM Version) sycl::event her(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - Treal alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *a, @@ -164,7 +164,7 @@ her (USM Version) sycl::event her(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - Treal alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *a, @@ -186,7 +186,7 @@ her (USM Version) Number of rows and columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to input vector ``x``. The array holding input vector diff --git a/source/elements/oneMKL/source/domains/blas/her2.rst b/source/elements/oneMKL/source/domains/blas/her2.rst index 3fde2ff40..b9922e20f 100644 --- a/source/elements/oneMKL/source/domains/blas/her2.rst +++ b/source/elements/oneMKL/source/domains/blas/her2.rst @@ -160,7 +160,7 @@ her2 (USM Version) sycl::event her2(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -175,7 +175,7 @@ her2 (USM Version) sycl::event her2(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -199,7 +199,7 @@ her2 (USM Version) Number of columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to input vector ``x``. The array holding input vector diff --git a/source/elements/oneMKL/source/domains/blas/her2k.rst b/source/elements/oneMKL/source/domains/blas/her2k.rst index 3ebdb72f0..9c158f264 100644 --- a/source/elements/oneMKL/source/domains/blas/her2k.rst +++ b/source/elements/oneMKL/source/domains/blas/her2k.rst @@ -253,12 +253,12 @@ her2k (USM Version) onemkl::transpose trans, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - Treal beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -271,12 +271,12 @@ her2k (USM Version) onemkl::transpose trans, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - Treal beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -307,7 +307,7 @@ her2k (USM Version) ``k`` must be at least equal to zero. alpha - Complex scaling factor for the rank-2k update. + Complex scaling factor for the rank-2k update. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. @@ -387,7 +387,7 @@ her2k (USM Version) - ``ldb`` must be at least ``k``. beta - Real scaling factor for matrix ``C``. + Real scaling factor for matrix ``C``. See :ref:`value_or_pointer` for more details. c Pointer to input/output matrix ``C``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/herk.rst b/source/elements/oneMKL/source/domains/blas/herk.rst index caa1ac3f5..09a21c76b 100644 --- a/source/elements/oneMKL/source/domains/blas/herk.rst +++ b/source/elements/oneMKL/source/domains/blas/herk.rst @@ -204,10 +204,10 @@ herk (USM Version) onemkl::transpose trans, std::int64_t n, std::int64_t k, - Treal alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, - Treal beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -220,10 +220,10 @@ herk (USM Version) onemkl::transpose trans, std::int64_t n, std::int64_t k, - Treal alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, - Treal beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -255,7 +255,7 @@ herk (USM Version) The value of ``k`` must be at least zero. alpha - Real scaling factor for the rank-k update. + Real scaling factor for the rank-k update. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. @@ -296,7 +296,7 @@ herk (USM Version) - ``lda`` must be at least ``n``. beta - Real scaling factor for matrix ``C``. + Real scaling factor for matrix ``C``. See :ref:`value_or_pointer` for more details. c Pointer to input/output matrix ``C``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/hpmv.rst b/source/elements/oneMKL/source/domains/blas/hpmv.rst index 07f1156e6..8e3db55de 100644 --- a/source/elements/oneMKL/source/domains/blas/hpmv.rst +++ b/source/elements/oneMKL/source/domains/blas/hpmv.rst @@ -158,11 +158,11 @@ hpmv (USM Version) sycl::event hpmv(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -173,11 +173,11 @@ hpmv (USM Version) sycl::event hpmv(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -197,7 +197,7 @@ hpmv (USM Version) Number of rows and columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. The array holding input matrix @@ -218,7 +218,7 @@ hpmv (USM Version) Stride of vector ``x``. Must not be zero. beta - Scaling factor for vector ``y``. + Scaling factor for vector ``y``. See :ref:`value_or_pointer` for more details. y Pointer to input/output vector ``y``. The array holding diff --git a/source/elements/oneMKL/source/domains/blas/hpr.rst b/source/elements/oneMKL/source/domains/blas/hpr.rst index 503c5bf5e..4d41f7769 100644 --- a/source/elements/oneMKL/source/domains/blas/hpr.rst +++ b/source/elements/oneMKL/source/domains/blas/hpr.rst @@ -148,7 +148,7 @@ hpr (USM Version) sycl::event hpr(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - Treal alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *a, @@ -160,7 +160,7 @@ hpr (USM Version) sycl::event hpr(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - Treal alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *a, @@ -181,7 +181,7 @@ hpr (USM Version) Number of rows and columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to input vector ``x``. The array holding input vector diff --git a/source/elements/oneMKL/source/domains/blas/hpr2.rst b/source/elements/oneMKL/source/domains/blas/hpr2.rst index 41c9351ad..8172b204f 100644 --- a/source/elements/oneMKL/source/domains/blas/hpr2.rst +++ b/source/elements/oneMKL/source/domains/blas/hpr2.rst @@ -157,7 +157,7 @@ hpr2 (USM Version) sycl::event hpr2(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -171,7 +171,7 @@ hpr2 (USM Version) sycl::event hpr2(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -194,7 +194,7 @@ hpr2 (USM Version) Number of rows and columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to input vector ``x``. The array holding input vector diff --git a/source/elements/oneMKL/source/domains/blas/imatcopy.rst b/source/elements/oneMKL/source/domains/blas/imatcopy.rst index 7c9f47cb1..5c8c0ecfb 100644 --- a/source/elements/oneMKL/source/domains/blas/imatcopy.rst +++ b/source/elements/oneMKL/source/domains/blas/imatcopy.rst @@ -178,7 +178,7 @@ imatcopy (USM Version) oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, T *matrix_in_out, std::int64_t ld_in, std::int64_t ld_out, @@ -190,7 +190,7 @@ imatcopy (USM Version) oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, T *matrix_in_out, std::int64_t ld_in, std::int64_t ld_out, @@ -214,7 +214,7 @@ imatcopy (USM Version) Number of columns for the matrix ``C`` on input. Must be at least zero. alpha - Scaling factor for the matrix transpose or copy operation. + Scaling factor for the matrix transpose or copy operation. See :ref:`value_or_pointer` for more details. matrix_in_out Pointer to input/output matrix ``C``. Must have size as follows: diff --git a/source/elements/oneMKL/source/domains/blas/imatcopy_batch.rst b/source/elements/oneMKL/source/domains/blas/imatcopy_batch.rst index 7f14f5dbe..5de027211 100644 --- a/source/elements/oneMKL/source/domains/blas/imatcopy_batch.rst +++ b/source/elements/oneMKL/source/domains/blas/imatcopy_batch.rst @@ -386,7 +386,7 @@ matrices is given by the ``batch_size`` parameter. oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *matrix_array_in_out, std::int64_t ld_in, std::int64_t ld_out, @@ -400,7 +400,7 @@ matrices is given by the ``batch_size`` parameter. oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *matrix_array_in_out, std::int64_t ld_in, std::int64_t ld_out, @@ -426,7 +426,7 @@ matrices is given by the ``batch_size`` parameter. Number of columns for each matrix ``C`` on input. Must be at least 0. alpha - Scaling factor for the matrix transpose or copy operation. + Scaling factor for the matrix transpose or copy operation. See :ref:`value_or_pointer` for more details. matrix_array_in_out Array holding the matrices ``C``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/omatadd.rst b/source/elements/oneMKL/source/domains/blas/omatadd.rst index 8f135979a..953e57279 100644 --- a/source/elements/oneMKL/source/domains/blas/omatadd.rst +++ b/source/elements/oneMKL/source/domains/blas/omatadd.rst @@ -249,10 +249,10 @@ omatadd (USM Version) oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, - T beta, + value_or_pointer beta, const T *b, std::int64_t ldb, T *c, @@ -267,10 +267,10 @@ omatadd (USM Version) oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, - T beta, + value_or_pointer beta, const T *b, std::int64_t ldb, T *c, @@ -300,7 +300,7 @@ omatadd (USM Version) Number of columns for the result matrix ``C``. Must be at least zero. alpha - Scaling factor for the matrix ``A``. + Scaling factor for the matrix ``A``. See :ref:`value_or_pointer` for more details. a Array holding the input matrix ``A``. @@ -335,7 +335,7 @@ omatadd (USM Version) - ``lda`` must be at least ``m``. beta - Scaling factor for the matrices ``B``. + Scaling factor for the matrices ``B``. See :ref:`value_or_pointer` for more details. b Array holding the input matrices ``B``. diff --git a/source/elements/oneMKL/source/domains/blas/omatadd_batch.rst b/source/elements/oneMKL/source/domains/blas/omatadd_batch.rst index 17c9ffc52..200acbaae 100644 --- a/source/elements/oneMKL/source/domains/blas/omatadd_batch.rst +++ b/source/elements/oneMKL/source/domains/blas/omatadd_batch.rst @@ -570,11 +570,11 @@ in-place operations: oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stride_a, - T beta, + value_or_pointer beta, T *b, std::int64_t ldb, std::int64_t stride_b, @@ -592,11 +592,11 @@ in-place operations: oneapi::mkl::transpose transb, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stride_a, - T beta, + value_or_pointer beta, T *b, std::int64_t ldb, std::int64_t stride_b, @@ -629,7 +629,7 @@ in-place operations: Number of columns for the result matrix ``C``. Must be at least zero. alpha - Scaling factor for the matrices ``A``. + Scaling factor for the matrices ``A``. See :ref:`value_or_pointer` for more details. a Array holding the input matrices ``A``. Must have size at least ``stride_a`` * ``batch_size``. @@ -667,7 +667,7 @@ in-place operations: - ``stride_a`` must be at least ``lda*n``. beta - Scaling factor for the matrices ``B``. + Scaling factor for the matrices ``B``. See :ref:`value_or_pointer` for more details. b Array holding the input matrices ``B``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/omatcopy.rst b/source/elements/oneMKL/source/domains/blas/omatcopy.rst index bd74e24f4..b0d99f9bd 100644 --- a/source/elements/oneMKL/source/domains/blas/omatcopy.rst +++ b/source/elements/oneMKL/source/domains/blas/omatcopy.rst @@ -187,7 +187,7 @@ omatcopy (USM Version) oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, T *b, @@ -201,7 +201,7 @@ omatcopy (USM Version) oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, T *b, @@ -227,7 +227,7 @@ omatcopy (USM Version) Number of columns for the matrix ``A``. Must be at least zero. alpha - Scaling factor for the matrix transposition or copy. + Scaling factor for the matrix transposition or copy. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/omatcopy2.rst b/source/elements/oneMKL/source/domains/blas/omatcopy2.rst index 84bc96414..302f321f0 100644 --- a/source/elements/oneMKL/source/domains/blas/omatcopy2.rst +++ b/source/elements/oneMKL/source/domains/blas/omatcopy2.rst @@ -213,7 +213,7 @@ omatcopy2 (USM Version) oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stridea, @@ -229,7 +229,7 @@ omatcopy2 (USM Version) oneapi::mkl::transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stridea, @@ -257,7 +257,7 @@ omatcopy2 (USM Version) Number of columns for the matrix ``A``. Must be at least zero. alpha - Scaling factor for the matrix transposition or copy. + Scaling factor for the matrix transposition or copy. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/omatcopy_batch.rst b/source/elements/oneMKL/source/domains/blas/omatcopy_batch.rst index 4edaae299..c361b3d30 100644 --- a/source/elements/oneMKL/source/domains/blas/omatcopy_batch.rst +++ b/source/elements/oneMKL/source/domains/blas/omatcopy_batch.rst @@ -430,7 +430,7 @@ the ``batch_size`` parameter. transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stride_a, @@ -447,7 +447,7 @@ the ``batch_size`` parameter. transpose trans, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stride_a, @@ -476,7 +476,7 @@ the ``batch_size`` parameter. Number of columns for each matrix B. Must be at least 0. alpha - Scaling factor for the matrix transpose or copy operation. + Scaling factor for the matrix transpose or copy operation. See :ref:`value_or_pointer` for more details. a Array holding the matrices A. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/rot.rst b/source/elements/oneMKL/source/domains/blas/rot.rst index b34032f01..f7e4b46a0 100644 --- a/source/elements/oneMKL/source/domains/blas/rot.rst +++ b/source/elements/oneMKL/source/domains/blas/rot.rst @@ -184,8 +184,8 @@ rot (USM Version) std::int64_t incx, T *y, std::int64_t incy, - Tc c, - Ts s, + value_or_pointer c, + value_or_pointer s, const std::vector &dependencies = {}) } .. code-block:: cpp @@ -197,8 +197,8 @@ rot (USM Version) std::int64_t incx, T *y, std::int64_t incy, - Tc c, - Ts s, + value_or_pointer c, + value_or_pointer s, const std::vector &dependencies = {}) } @@ -231,10 +231,10 @@ rot (USM Version) Stride of vector ``y``. c - Scaling factor. + Scaling factor. See :ref:`value_or_pointer` for more details. s - Scaling factor. + Scaling factor. See :ref:`value_or_pointer` for more details. dependencies List of events to wait for before starting computation, if any. diff --git a/source/elements/oneMKL/source/domains/blas/rotmg.rst b/source/elements/oneMKL/source/domains/blas/rotmg.rst index 556e649cb..2474a105e 100644 --- a/source/elements/oneMKL/source/domains/blas/rotmg.rst +++ b/source/elements/oneMKL/source/domains/blas/rotmg.rst @@ -173,7 +173,7 @@ rotmg (USM Version) T *d1, T *d2, T *x1, - T y1, + value_or_pointer y1, T *param, const std::vector &dependencies = {}) } @@ -184,7 +184,7 @@ rotmg (USM Version) T *d1, T *d2, T *x1, - T y1, + value_or_pointer y1, T *param, const std::vector &dependencies = {}) } @@ -208,7 +208,7 @@ rotmg (USM Version) Pointer to the ``x``-coordinate of the input vector. y1 - Scalar specifying the ``y``-coordinate of the input vector. + Scalar specifying the ``y``-coordinate of the input vector. See :ref:`value_or_pointer` for more details. dependencies List of events to wait for before starting computation, if any. diff --git a/source/elements/oneMKL/source/domains/blas/sbmv.rst b/source/elements/oneMKL/source/domains/blas/sbmv.rst index af7c5cf46..9bc2dc72c 100644 --- a/source/elements/oneMKL/source/domains/blas/sbmv.rst +++ b/source/elements/oneMKL/source/domains/blas/sbmv.rst @@ -169,12 +169,12 @@ sbmv (USM Version) onemkl::uplo upper_lower, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -186,12 +186,12 @@ sbmv (USM Version) onemkl::uplo upper_lower, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -215,7 +215,7 @@ sbmv (USM Version) zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. The array holding input matrix @@ -236,7 +236,7 @@ sbmv (USM Version) Stride of vector ``x``. Must not be zero. beta - Scaling factor for vector ``y``. + Scaling factor for vector ``y``. See :ref:`value_or_pointer` for more details. y Pointer to input/output vector ``y``. The array holding diff --git a/source/elements/oneMKL/source/domains/blas/scal.rst b/source/elements/oneMKL/source/domains/blas/scal.rst index 83ad8f369..bef42a9e4 100644 --- a/source/elements/oneMKL/source/domains/blas/scal.rst +++ b/source/elements/oneMKL/source/domains/blas/scal.rst @@ -136,7 +136,7 @@ scal (USM Version) namespace oneapi::mkl::blas::column_major { sycl::event scal(sycl::queue &queue, std::int64_t n, - Ts alpha, + value_or_pointer alpha, T *x, std::int64_t incx, const std::vector &dependencies = {}) @@ -146,7 +146,7 @@ scal (USM Version) namespace oneapi::mkl::blas::row_major { sycl::event scal(sycl::queue &queue, std::int64_t n, - Ts alpha, + value_or_pointer alpha, T *x, std::int64_t incx, const std::vector &dependencies = {}) @@ -163,7 +163,7 @@ scal (USM Version) Number of elements in vector ``x``. alpha - Specifies the scalar ``alpha``. + Specifies the scalar ``alpha``. See :ref:`value_or_pointer` for more details. x Pointer to the input vector ``x``. The array must be of size at diff --git a/source/elements/oneMKL/source/domains/blas/spmv.rst b/source/elements/oneMKL/source/domains/blas/spmv.rst index 4326946af..4f747b02d 100644 --- a/source/elements/oneMKL/source/domains/blas/spmv.rst +++ b/source/elements/oneMKL/source/domains/blas/spmv.rst @@ -155,11 +155,11 @@ spmv (USM Version) sycl::event spmv(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -170,11 +170,11 @@ spmv (USM Version) sycl::event spmv(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -194,7 +194,7 @@ spmv (USM Version) Number of rows and columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. The array holding input matrix @@ -212,7 +212,7 @@ spmv (USM Version) Stride of vector ``x``. Must not be zero. beta - Scaling factor for vector ``y``. + Scaling factor for vector ``y``. See :ref:`value_or_pointer` for more details. y Pointer to input/output vector ``y``. The array holding diff --git a/source/elements/oneMKL/source/domains/blas/spr.rst b/source/elements/oneMKL/source/domains/blas/spr.rst index af0f47e05..f0fad4598 100644 --- a/source/elements/oneMKL/source/domains/blas/spr.rst +++ b/source/elements/oneMKL/source/domains/blas/spr.rst @@ -141,7 +141,7 @@ spr (USM Version) sycl::event spr(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *a, @@ -153,7 +153,7 @@ spr (USM Version) sycl::event spr(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *a, @@ -174,7 +174,7 @@ spr (USM Version) Number of rows and columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to input vector ``x``. The array holding input vector diff --git a/source/elements/oneMKL/source/domains/blas/spr2.rst b/source/elements/oneMKL/source/domains/blas/spr2.rst index 2c9077808..907b590f5 100644 --- a/source/elements/oneMKL/source/domains/blas/spr2.rst +++ b/source/elements/oneMKL/source/domains/blas/spr2.rst @@ -152,7 +152,7 @@ spr2 (USM Version) sycl::event spr2(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -166,7 +166,7 @@ spr2 (USM Version) sycl::event spr2(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -189,7 +189,7 @@ spr2 (USM Version) Number of rows and columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to input vector ``x``. The array holding input vector diff --git a/source/elements/oneMKL/source/domains/blas/symm.rst b/source/elements/oneMKL/source/domains/blas/symm.rst index 72552ac5e..b81612e0b 100644 --- a/source/elements/oneMKL/source/domains/blas/symm.rst +++ b/source/elements/oneMKL/source/domains/blas/symm.rst @@ -209,12 +209,12 @@ symm (USM Version) onemkl::uplo upper_lower, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -227,12 +227,12 @@ symm (USM Version) onemkl::uplo upper_lower, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -263,7 +263,7 @@ symm (USM Version) be at least zero. alpha - Scaling factor for the matrix-matrix product. + Scaling factor for the matrix-matrix product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. Must have size at least @@ -290,7 +290,7 @@ symm (USM Version) least ``n`` if column major layout is used to store matrices. beta - Scaling factor for matrix ``C``. + Scaling factor for matrix ``C``. See :ref:`value_or_pointer` for more details. c The pointer to input/output matrix ``C``. It must have a diff --git a/source/elements/oneMKL/source/domains/blas/symv.rst b/source/elements/oneMKL/source/domains/blas/symv.rst index 7b9127d54..a19d89e4d 100644 --- a/source/elements/oneMKL/source/domains/blas/symv.rst +++ b/source/elements/oneMKL/source/domains/blas/symv.rst @@ -111,6 +111,9 @@ symv (Buffer Version) incx Stride of vector ``x``. Must not be zero. + beta + Scaling factor for the vector ``y``. + y Buffer holding input/output vector ``y``. The buffer must be of size at least (1 + (``n`` - 1)*abs(``incy``)). See :ref:`matrix-storage` @@ -160,12 +163,12 @@ symv (USM Version) sycl::event symv(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -176,12 +179,12 @@ symv (USM Version) sycl::event symv(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *x, std::int64_t incx, - T beta, + value_or_pointer beta, T *y, std::int64_t incy, const std::vector &dependencies = {}) @@ -201,7 +204,7 @@ symv (USM Version) Number of rows and columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. The array holding input matrix @@ -221,6 +224,9 @@ symv (USM Version) incx Stride of vector ``x``. Must not be zero. + beta + Scaling factor for the vector ``y``. See :ref:`value_or_pointer` for more details. + y Pointer to input/output vector ``y``. The array holding input/output vector ``y`` must be of size at least (1 + (``n`` diff --git a/source/elements/oneMKL/source/domains/blas/syr.rst b/source/elements/oneMKL/source/domains/blas/syr.rst index b67a6317c..25c5bad91 100644 --- a/source/elements/oneMKL/source/domains/blas/syr.rst +++ b/source/elements/oneMKL/source/domains/blas/syr.rst @@ -149,7 +149,7 @@ syr (USM Version) sycl::event syr(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *a, @@ -162,7 +162,7 @@ syr (USM Version) sycl::event syr(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, T *a, @@ -184,7 +184,7 @@ syr (USM Version) Number of columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to input vector ``x``. The array holding input vector diff --git a/source/elements/oneMKL/source/domains/blas/syr2.rst b/source/elements/oneMKL/source/domains/blas/syr2.rst index 228e0fbb1..1c719f247 100644 --- a/source/elements/oneMKL/source/domains/blas/syr2.rst +++ b/source/elements/oneMKL/source/domains/blas/syr2.rst @@ -161,7 +161,7 @@ syr2 (USM Version) sycl::event syr2(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -176,7 +176,7 @@ syr2 (USM Version) sycl::event syr2(sycl::queue &queue, onemkl::uplo upper_lower, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *x, std::int64_t incx, const T *y, @@ -200,7 +200,7 @@ syr2 (USM Version) Number of columns of ``A``. Must be at least zero. alpha - Scaling factor for the matrix-vector product. + Scaling factor for the matrix-vector product. See :ref:`value_or_pointer` for more details. x Pointer to input vector ``x``. The array holding input vector diff --git a/source/elements/oneMKL/source/domains/blas/syr2k.rst b/source/elements/oneMKL/source/domains/blas/syr2k.rst index b509c69c3..51296804e 100644 --- a/source/elements/oneMKL/source/domains/blas/syr2k.rst +++ b/source/elements/oneMKL/source/domains/blas/syr2k.rst @@ -253,12 +253,12 @@ syr2k (USM Version) onemkl::transpose trans, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -271,12 +271,12 @@ syr2k (USM Version) onemkl::transpose trans, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, const T *b, std::int64_t ldb, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -307,7 +307,7 @@ syr2k (USM Version) must be at least zero. alpha - Scaling factor for the rank-2k update. + Scaling factor for the rank-2k update. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. @@ -387,7 +387,7 @@ syr2k (USM Version) - ``ldb`` must be at least ``n``. beta - Scaling factor for matrix ``C``. + Scaling factor for matrix ``C``. See :ref:`value_or_pointer` for more details. c Pointer to input/output matrix ``C``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/syrk.rst b/source/elements/oneMKL/source/domains/blas/syrk.rst index 5ee032c5f..94223f50f 100644 --- a/source/elements/oneMKL/source/domains/blas/syrk.rst +++ b/source/elements/oneMKL/source/domains/blas/syrk.rst @@ -199,10 +199,10 @@ syrk (USM Version) onemkl::transpose trans, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -215,10 +215,10 @@ syrk (USM Version) onemkl::transpose trans, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, const std::vector &dependencies = {}) @@ -249,7 +249,7 @@ syrk (USM Version) least zero. alpha - Scaling factor for the rank-k update. + Scaling factor for the rank-k update. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. @@ -290,7 +290,7 @@ syrk (USM Version) - ``lda`` must be at least ``n``. beta - Scaling factor for matrix ``C``. + Scaling factor for matrix ``C``. See :ref:`value_or_pointer` for more details. c Pointer to input/output matrix ``C``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/syrk_batch.rst b/source/elements/oneMKL/source/domains/blas/syrk_batch.rst index 3252641ef..b7318f314 100644 --- a/source/elements/oneMKL/source/domains/blas/syrk_batch.rst +++ b/source/elements/oneMKL/source/domains/blas/syrk_batch.rst @@ -389,11 +389,11 @@ in ``a`` and ``c`` are given by the ``batch_size`` parameter. transpose trans, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stride_a, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, std::int64_t stride_c, @@ -408,11 +408,11 @@ in ``a`` and ``c`` are given by the ``batch_size`` parameter. transpose trans, std::int64_t n, std::int64_t k, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stride_a, - T beta, + value_or_pointer beta, T *c, std::int64_t ldc, std::int64_t stride_c, @@ -446,7 +446,7 @@ in ``a`` and ``c`` are given by the ``batch_size`` parameter. Must be at least zero. alpha - Scaling factor for the rank-k updates. + Scaling factor for the rank-k updates. See :ref:`value_or_pointer` for more details. a Pointer to input matrices ``A`` with size ``stridea`` * ``batch_size``. @@ -471,7 +471,7 @@ in ``a`` and ``c`` are given by the ``batch_size`` parameter. Stride between different ``A`` matrices. beta - Scaling factor for the matrices ``C``. + Scaling factor for the matrices ``C``. See :ref:`value_or_pointer` for more details. c Pointer to input/output matrices ``C`` with size ``stridec`` * ``batch_size``. diff --git a/source/elements/oneMKL/source/domains/blas/trmm.rst b/source/elements/oneMKL/source/domains/blas/trmm.rst index 5d346adab..41e91f210 100644 --- a/source/elements/oneMKL/source/domains/blas/trmm.rst +++ b/source/elements/oneMKL/source/domains/blas/trmm.rst @@ -199,7 +199,7 @@ trmm (USM Version) onemkl::diag unit_diag, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, T *b, @@ -215,7 +215,7 @@ trmm (USM Version) onemkl::diag unit_diag, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, T *b, @@ -256,7 +256,7 @@ trmm (USM Version) must be at least zero. alpha - Scaling factor for the matrix-matrix product. + Scaling factor for the matrix-matrix product. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/trsm.rst b/source/elements/oneMKL/source/domains/blas/trsm.rst index 9886d2226..57995cf6b 100644 --- a/source/elements/oneMKL/source/domains/blas/trsm.rst +++ b/source/elements/oneMKL/source/domains/blas/trsm.rst @@ -198,7 +198,7 @@ trsm (USM Version) onemkl::diag unit_diag, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, T *b, @@ -215,7 +215,7 @@ trsm (USM Version) onemkl::diag unit_diag, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, T *b, @@ -255,7 +255,7 @@ trsm (USM Version) must be at least zero. alpha - Scaling factor for the solution. + Scaling factor for the solution. See :ref:`value_or_pointer` for more details. a Pointer to input matrix ``A``. Must have size at least diff --git a/source/elements/oneMKL/source/domains/blas/trsm_batch.rst b/source/elements/oneMKL/source/domains/blas/trsm_batch.rst index f025bc4b1..5d11ac365 100644 --- a/source/elements/oneMKL/source/domains/blas/trsm_batch.rst +++ b/source/elements/oneMKL/source/domains/blas/trsm_batch.rst @@ -410,7 +410,7 @@ in ``a`` and ``b`` are given by the ``batch_size`` parameter. onemkl::diag unit_diag, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stridea, @@ -430,7 +430,7 @@ in ``a`` and ``b`` are given by the ``batch_size`` parameter. onemkl::diag unit_diag, std::int64_t m, std::int64_t n, - T alpha, + value_or_pointer alpha, const T *a, std::int64_t lda, std::int64_t stridea, @@ -471,7 +471,7 @@ in ``a`` and ``b`` are given by the ``batch_size`` parameter. Number of columns of the ``B`` matrices. Must be at least zero. alpha - Scaling factor for the solutions. + Scaling factor for the solutions. See :ref:`value_or_pointer` for more details. a Pointer to input matrices ``A`` with size ``stridea`` * ``batch_size``. diff --git a/source/elements/oneMKL/source/domains/dense_linear_algebra.inc.rst b/source/elements/oneMKL/source/domains/dense_linear_algebra.inc.rst index fa9f16890..517ce3516 100644 --- a/source/elements/oneMKL/source/domains/dense_linear_algebra.inc.rst +++ b/source/elements/oneMKL/source/domains/dense_linear_algebra.inc.rst @@ -13,11 +13,14 @@ This section contains information about dense linear algebra routines: :ref:`onemkl_blas` provides vector, matrix-vector, and matrix-matrix routines for dense matrices and vector operations. +:ref:`value_or_pointer` describes some details of how scalar parameters (such as ``alpha`` and ``beta``) are handled so that users may pass either values or pointers for these parameters. + :ref:`onemkl_lapack` provides more complex dense linear algebra routines, e.g., matrix factorization, solving dense systems of linear equations, least square problems, eigenvalue and singular value problems, and performing a number of related computational tasks. .. toctree:: :hidden: matrix-storage.rst + value_or_pointer.rst blas/blas.rst lapack/lapack.rst diff --git a/source/elements/oneMKL/source/domains/dft/compute_backward.rst b/source/elements/oneMKL/source/domains/dft/compute_backward.rst index 484e61e62..c25c104a8 100644 --- a/source/elements/oneMKL/source/domains/dft/compute_backward.rst +++ b/source/elements/oneMKL/source/domains/dft/compute_backward.rst @@ -5,53 +5,65 @@ .. _onemkl_dft_compute_backward: compute_backward -================= +================ -This function computes the backward transform defined by an instantiation of the :ref:`onemkl_dft_descriptor` class. +This function computes the backward DFT(s), as defined by an instantiation of +the :ref:`descriptor` class, on user-provided data. .. _onemkl_dft_compute_backward_description: .. rubric:: Description -The compute_backward function accepts the :ref:`onemkl_dft_descriptor` and one or more data parameters and in the case of USM data, any ``syc::event`` dependencies. Given a successfully configured and committed descriptor, this function computes the backward transform, that is, the :ref:`transform` with the plus sign, :math:`\delta=+1`, in the exponent. +Given a successfully committed :ref:`descriptor` object +whose configuration is not inconsistent with backward DFT calculations, this +function computes the backward transform defined by that object. -The configuration parameters ``config_param::COMPLEX_STORAGE``, ``config_param::REAL_STORAGE`` and ``config_param::CONJUGATE_EVEN_STORAGE`` define the layout of the input and output data and must be properly set in a call to :ref:`onemkl_dft_descriptor_set_value`. +The ``compute_backward`` function requires a successfully committed object of +the :ref:`descriptor` class and one, two or four "data +container" arguments (depending on the configuration of the +:ref:`descriptor` object). If using (pointers to) USM +allocations as data containers, this function may also be provided with an +``std::vector`` object collecting dependencies to be observed by +the desired DFT calculations and return a ``sycl::event`` tracking the +progress of the DFT calculations enqueued by this function. .. note:: - The compute_backward function may need to access the internals and private/protected members of the :ref:`onemkl_dft_descriptor` class. This could be done, for instance, by labeling it as a friend function to the descriptor class. - + The compute_backward function may need to access the internals and + private/protected members of the :ref:`descriptor` + class. This could be done, for instance, by labeling it as a friend function + to the :ref:`descriptor` class. .. onemkl_dft_compute_backward_buffer: compute_backward (Buffer version) --------------------------------- -.. rubric:: Syntax (In-place transform) +.. rubric:: Syntax (in-place transform, except for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { template - void compute_backward( descriptor_type &desc, - sycl::buffer &inout ); - + void compute_backward( descriptor_type &desc, + sycl::buffer &inout); } -.. rubric:: Syntax (In-place transform, using ``config_param::COMPLEX_STORAGE=config_value::REAL_REAL`` :ref:`data format` ) + +.. rubric:: Syntax (in-place transform, for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { - template - void compute_backward( descriptor_type &desc, - sycl::buffer &inout_re, - sycl::buffer &inout_im); + template + void compute_backward( descriptor_type &desc, + sycl::buffer &inout_re, + sycl::buffer &inout_im); } -.. rubric:: Syntax (Out-of-place transform) +.. rubric:: Syntax (out-of-place transform, except for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp @@ -59,11 +71,11 @@ compute_backward (Buffer version) template void compute_backward( descriptor_type &desc, - sycl::buffer &in, + sycl::buffer &in, sycl::buffer &out); } -.. rubric:: Syntax (Out-of-place transform, using ``config_param::COMPLEX_STORAGE=config_value::REAL_REAL`` :ref:`data format` ) +.. rubric:: Syntax (out-of-place transform, for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp @@ -75,35 +87,53 @@ compute_backward (Buffer version) sycl::buffer &in_im, sycl::buffer &out_re, sycl::buffer &out_im); - } + .. container:: section .. rubric:: Input Parameters :ref:`desc` - A fully configured and committed discrete Fourier transform descriptor class object, defining the type of backward transformation and data layout to be applied. At commit time, the ``sycl::queue`` has already been provided. + A fully configured and committed object of the + :ref:`descriptor` class, whose configuration is not + inconsistent with backward DFT calculations. inout - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house both the input and output data sequences for the in-place transformation. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + ``sycl::buffer`` object of sufficient capacity to store the elements + defining all the relevant data sequences, as configured by ``desc`` + (configured for in-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). inout_re - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the real parts of all the relevant data sequences, as configured + by ``desc``. Only with complex descriptors configured for in-place operations with + :ref:`onemkl_dft_complex_storage_real_real`. inout_im - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the imaginary parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. in - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the input data sequence for the out-of-place transformation. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining all the relevant backward-domain data sequences, as configured by + ``desc`` (configured for out-of-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). in_re - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of input data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the real parts of all the relevant backward-domain data sequences, + as configured by ``desc``. Only with complex descriptors configured for out-of-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. in_im - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of input data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the imaginary parts of all the relevant backward-domain data + sequences, as configured by ``desc``. Only with complex descriptors configured for + out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. .. container:: section @@ -111,96 +141,112 @@ compute_backward (Buffer version) .. rubric:: Output Parameters inout - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house both the input and output data sequences for the in-place transformation. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining all the relevant data sequences, as configured by ``desc`` + (configured for in-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). inout_re - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the real parts of all the relevant data sequences, as configured + by ``desc``. Only with complex descriptors configured for in-place operations with + :ref:`onemkl_dft_complex_storage_real_real`. inout_im - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the imaginary parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. out - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the output data sequence for the out-of-place transformation. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining all the relevant forward-domain data sequences, as configured by + ``desc`` (configured for out-of-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). out_re - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of output data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the real parts of all the relevant forward-domain data sequences, + as configured by ``desc``. Only with complex descriptors configured for out-of-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. out_im - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of output data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the imaginary parts of all the relevant forward-domain data + sequences, as configured by ``desc``. Only with complex descriptors configured for + out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. .. container:: section .. rubric:: Throws - The `oneapi::mkl::dft::compute_backward()` routine shall throw the following exceptions if the associated condition is detected. An implementation may throw additional implementation-specific exception(s) in case of error conditions not covered here: + The ``oneapi::mkl::dft::compute_backward`` routine shall throw the following + exceptions if the associated condition is detected. An implementation may + throw additional implementation-specific exception(s) in case of error + conditions not covered here: :ref:`oneapi::mkl::invalid_argument()` - If the provided :ref:`onemkl_dft_descriptor` class is invalid, for instance, if it is a nullptr or if the value of ``config_param::COMMIT_STATUS`` in descriptor is not ``config_param::COMMITTED``. - - - + If the provided :ref:`descriptor` object ``desc`` + is invalid, for instance, if its configuration value associated with + configuration parameter ``config_param::COMMIT_STATUS`` is not + ``config_param::COMMITTED``. .. onemkl_dft_compute_backward_usm: compute_backward (USM version) ---------------------------------- +------------------------------ -.. rubric:: Syntax (In-place transform) +.. rubric:: Syntax (in-place transform, except for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { - + template sycl::event compute_backward( descriptor_type &desc, data_type *inout, - const std::vector &dependencies = {}); + const std::vector &dependencies = {}); } -.. rubric:: Syntax (In-place transform, using ``config_param::COMPLEX_STORAGE=config_value::REAL_REAL`` :ref:`data format` ) +.. rubric:: Syntax (in-place transform, for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { - - template - sycl::event compute_backward(descriptor_type &desc, - data_type *inout_re, - data_type *inout_im, - const std::vector &dependencies = {}); + template + sycl::event compute_backward( descriptor_type &desc, + data_type *inout_re, + data_type *inout_im, + const std::vector &dependencies = {}); } - -.. rubric:: Syntax (Out-of-place transform) +.. rubric:: Syntax (out-of-place transform, except for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { - + template sycl::event compute_backward( descriptor_type &desc, input_type *in, output_type *out, - const std::vector &dependencies = {}); - + const std::vector &dependencies = {}); } - -.. rubric:: Syntax (Out-of-place transform, using ``config_param::COMPLEX_STORAGE=config_value::REAL_REAL`` :ref:`data format` ) +.. rubric:: Syntax (out-of-place transform, for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { - + template sycl::event compute_backward( descriptor_type &desc, input_type *in_re, input_type *in_im, output_type *out_re, output_type *out_im, - const std::vector &dependencies = {}); - + const std::vector &dependencies = {}); } .. container:: section @@ -208,70 +254,114 @@ compute_backward (USM version) .. rubric:: Input Parameters :ref:`desc` - A fully configured and committed discrete Fourier transform descriptor class object, defining the type of backward transformation and data layout to be applied. At commit time, the ``sycl::queue`` has already been provided. + A fully configured and committed object of the + :ref:`descriptor` class, whose configuration is not + inconsistent with backward DFT calculations. inout - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house both the input and output data sequences for the in-place transformation. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining all the relevant data sequences, as configured by ``desc`` + (configured for in-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). inout_re - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining the real parts of all the relevant data sequences, as configured + by ``desc``. Only with complex descriptors configured for in-place operations with + :ref:`onemkl_dft_complex_storage_real_real`. inout_im - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining the imaginary parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. in - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the input data sequence for the out-of-place transformation. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining all the relevant backward-domain data sequences, as configured by + ``desc`` (configured for out-of-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). in_re - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of the input data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + Pointer to USM allocation of sufficient capacity to store the elements + defining the real parts of all the relevant backward-domain data sequences, + as configured by ``desc``. Only with complex descriptors configured for out-of-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. + in_im - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of the input data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + Pointer to USM allocation of sufficient capacity to store the elements + defining the imaginary parts of all the relevant backward-domain data + sequences, as configured by ``desc``. Only with complex descriptors configured for + out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. + dependencies - A vector of ``sycl::event``'s that represent the previously enqueued tasks that must be finished before this transformation can be started. + An ``std::vector`` object collecting the events returned by + previously enqueued tasks that must be finished before this transform can + be calculated. .. container:: section .. rubric:: Output Parameters - inout - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house both the input and output data sequences for the in-place transformation. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining all the relevant data sequences, as configured by ``desc`` + (configured for in-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). inout_re - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining the real parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. inout_im - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + Pointer to USM allocation of sufficient capacity to store the elements + defining the imaginary parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. out - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the output data sequence for the out-of-place transformation. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining all the relevant forward-domain data sequences, as configured by + ``desc`` (configured for out-of-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). out_re - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of the output data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + Pointer to USM allocation of sufficient capacity to store the elements + defining the real parts of all the relevant forward-domain data sequences, + as configured by ``desc``. Only with complex descriptors configured for out-of-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. + out_im - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of the output data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + Pointer to USM allocation of sufficient capacity to store the elements + defining the imaginary parts of all the relevant forward-domain data + sequences, as configured by ``desc``. Only with complex descriptors configured for + out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. + .. container:: section .. rubric:: Throws - The `oneapi::mkl::dft::compute_backward()` routine shall throw the following exceptions if the associated condition is detected. An implementation may throw additional implementation-specific exception(s) in case of error conditions not covered here: - - :ref:`oneapi::mkl::invalid_argument()` - If the provided :ref:`onemkl_dft_descriptor` class is invalid, for instance, if it is a nullptr or if the value of ``config_param::COMMIT_STATUS`` in descriptor is not ``config_param::COMMITTED``. It will also be thrown if the input/output pointers are NULL. + The ``oneapi::mkl::dft::compute_backward()`` routine shall throw the following + exceptions if the associated condition is detected. An implementation may + throw additional implementation-specific exception(s) in case of error + conditions not covered here: + :ref:`oneapi::mkl::invalid_argument()` + If the provided :ref:`descriptor` object ``desc`` + is invalid, for instance, if its configuration value associated with + configuration parameter ``config_param::COMMIT_STATUS`` is not + ``config_param::COMMITTED``. It will also be thrown if any required + input/output pointer is ``nullptr``. .. container:: section .. rubric:: Return Values - This function returns a ``sycl::event`` that allows to track progress of this transformation, and can be passed as a dependency to other routines that may depend on the results of this transformation to be finished before proceeding with the other operations. - + This function returns a ``sycl::event`` object that allows to track progress + of the backward DFT, and can be passed as a dependency to other routines that + may depend on the result of the backward transform(s) before proceeding with + other operations. **Parent topic:** :ref:`onemkl_dft` - - diff --git a/source/elements/oneMKL/source/domains/dft/compute_forward.rst b/source/elements/oneMKL/source/domains/dft/compute_forward.rst index 7476b260b..284e1f220 100644 --- a/source/elements/oneMKL/source/domains/dft/compute_forward.rst +++ b/source/elements/oneMKL/source/domains/dft/compute_forward.rst @@ -7,52 +7,63 @@ compute_forward =============== -This function computes the forward transform defined by an instantiation of the :ref:`onemkl_dft_descriptor` class. +This function computes the forward DFT(s), as defined by an instantiation of +the :ref:`descriptor` class, on user-provided data. .. _onemkl_dft_compute_forward_description: .. rubric:: Description -The compute_forward function accepts the :ref:`onemkl_dft_descriptor` and one or more data parameters and in the case of USM data, any ``syc::event`` dependencies. Given a successfully configured and committed descriptor, this function computes the forward transform, that is, the :ref:`transform` with the minus sign, :math:`\delta=-1`, in the exponent. +Given a successfully committed :ref:`descriptor` object +whose configuration is not inconsistent with forward DFT calculations, this +function computes the forward transform defined by that object. -The configuration parameters ``config_param::COMPLEX_STORAGE``, ``config_param::REAL_STORAGE`` and ``config_param::CONJUGATE_EVEN_STORAGE`` define the layout of the input and output data and must be properly set in a call to :ref:`onemkl_dft_descriptor_set_value`. +The ``compute_forward`` function requires a successfully committed object of the +:ref:`descriptor` class and one, two or four "data +container" arguments (depending on the configuration of the +:ref:`descriptor` object). If using (pointers to) USM +allocations as data containers, this function may also be provided with an +``std::vector`` object collecting dependencies to be observed by +the desired DFT calculations and return a ``sycl::event`` tracking the +progress of the DFT calculations enqueued by this function. .. note:: - The compute_forward function may need to access the internals and private/protected members of the :ref:`onemkl_dft_descriptor` class. This could be done, for instance, by labeling it as a friend function to the descriptor class. - + The compute_forward function may need to access the internals and + private/protected members of the :ref:`descriptor` + class. This could be done, for instance, by labeling it as a friend function + to the :ref:`descriptor` class. .. onemkl_dft_compute_forward_buffer: compute_forward (Buffer version) ---------------------------------- +-------------------------------- -.. rubric:: Syntax (In-place transform) +.. rubric:: Syntax (in-place transform, except for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { template - void compute_forward( descriptor_type &desc, - sycl::buffer &inout); - + void compute_forward( descriptor_type &desc, + sycl::buffer &inout); } -.. rubric:: Syntax (In-place transform, using ``config_param::COMPLEX_STORAGE=config_value::REAL_REAL`` :ref:`data format` ) +.. rubric:: Syntax (in-place transform, for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { template - void compute_forward( descriptor_type &desc, - sycl::buffer &inout_re, - sycl::buffer &inout_im); + void compute_forward( descriptor_type &desc, + sycl::buffer &inout_re, + sycl::buffer &inout_im); } -.. rubric:: Syntax (Out-of-place transform) +.. rubric:: Syntax (out-of-place transform, except for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp @@ -64,7 +75,7 @@ compute_forward (Buffer version) sycl::buffer &out); } -.. rubric:: Syntax (Out-of-place transform, using ``config_param::COMPLEX_STORAGE=config_value::REAL_REAL`` :ref:`data format` ) +.. rubric:: Syntax (out-of-place transform, for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp @@ -76,37 +87,53 @@ compute_forward (Buffer version) sycl::buffer &in_im, sycl::buffer &out_re, sycl::buffer &out_im); - } - - .. container:: section .. rubric:: Input Parameters :ref:`desc` - A fully configured and committed discrete Fourier transform descriptor class object, defining the type of transformation and data layout to be applied. At commit time, the ``sycl::queue`` has already been provided. + A fully configured and committed object of the + :ref:`descriptor` class, whose configuration is not + inconsistent with forward DFT calculations. inout - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house both the input and output data sequences for the in-place transformation. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining all the relevant data sequences, as configured by ``desc`` + (configured for in-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). inout_re - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the real parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. inout_im - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the imaginary parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. in - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the input data sequence for the out-of-place transformation. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining all the relevant forward-domain data sequences, as + configured by ``desc`` (configured for out-of-place operations and not + with :ref:`onemkl_dft_complex_storage_real_real`, if complex). in_re - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of input data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the real parts of all the relevant forward-domain data sequences, + as configured by ``desc``. Only with complex descriptors configured for + out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. in_im - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of input data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the imaginary parts of all the relevant forward-domain data + sequences, as configured by ``desc``. Only with complex descriptors configured for + out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. .. container:: section @@ -114,176 +141,227 @@ compute_forward (Buffer version) .. rubric:: Output Parameters inout - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house both the input and output data sequences for the in-place transformation. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining all the relevant data sequences, as configured by ``desc`` + (configured for in-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). inout_re - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the real parts of all the relevant data sequences, as configured + by ``desc``. Only with complex descriptors configured for in-place operations with + :ref:`onemkl_dft_complex_storage_real_real`. inout_im - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the imaginary parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. out - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the output data sequence for the out-of-place transformation. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining all the relevant backward-domain data sequences, as configured by + ``desc`` (configured for out-of-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). out_re - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of output data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the real parts of all the relevant backward-domain data sequences, + as configured by ``desc``. Only with complex descriptors configured for + out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. out_im - Sycl buffer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of output data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + ``sycl::buffer`` object of sufficient capacity to store the elements + defining the imaginary parts of all the relevant backward-domain data + sequences, as configured by ``desc``. Only with complex descriptors configured + for out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. .. container:: section .. rubric:: Throws - The `oneapi::mkl::dft::compute_forward` routine shall throw the following exceptions if the associated condition is detected. An implementation may throw additional implementation-specific exception(s) in case of error conditions not covered here: + The ``oneapi::mkl::dft::compute_forward`` routine shall throw the following + exceptions if the associated condition is detected. An implementation may + throw additional implementation-specific exception(s) in case of error + conditions not covered here: :ref:`oneapi::mkl::invalid_argument()` - If the provided :ref:`onemkl_dft_descriptor` class is invalid, for instance, if it is a nullptr or if the value of ``config_param::COMMIT_STATUS`` in descriptor is not ``config_param::COMMITTED``. - - - + If the provided :ref:`descriptor` object ``desc`` + is invalid, for instance, if its configuration value associated with + configuration parameter ``config_param::COMMIT_STATUS`` is not + ``config_param::COMMITTED``. .. onemkl_dft_compute_forward_usm: compute_forward (USM version) ---------------------------------- +----------------------------- -.. rubric:: Syntax (In-place transform) +.. rubric:: Syntax (in-place transform, except for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { - + template sycl::event compute_forward( descriptor_type &desc, data_type *inout, - const std::vector &dependencies = {}); + const std::vector &dependencies = {}); } - -.. rubric:: Syntax (In-place transform, using ``config_param::COMPLEX_STORAGE=config_value::REAL_REAL`` :ref:`data format` ) +.. rubric:: Syntax (in-place transform, for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { - - template - sycl::event compute_forward(descriptor_type &desc, - data_type *inout_re, - data_type *inout_im, - const std::vector &dependencies = {}); + template + sycl::event compute_forward( descriptor_type &desc, + data_type *inout_re, + data_type *inout_im, + const std::vector &dependencies = {}); } - - - - -.. rubric:: Syntax (Out-of-place transform) +.. rubric:: Syntax (out-of-place transform, except for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { - + template sycl::event compute_forward( descriptor_type &desc, input_type *in, output_type *out, - const std::vector &dependencies = {}); - + const std::vector &dependencies = {}); } - -.. rubric:: Syntax (Out-of-place transform, using ``config_param::COMPLEX_STORAGE=config_value::REAL_REAL`` :ref:`data format` ) +.. rubric:: Syntax (out-of-place transform, for complex descriptors with :ref:`onemkl_dft_complex_storage_real_real`) .. code-block:: cpp namespace oneapi::mkl::dft { - + template sycl::event compute_forward( descriptor_type &desc, input_type *in_re, input_type *in_im, output_type *out_re, output_type *out_im, - const std::vector &dependencies = {}); - + const std::vector &dependencies = {}); } - - .. container:: section - .. rubric:: Input Parameter + .. rubric:: Input Parameters :ref:`desc` - A fully configured and committed discrete Fourier transform descriptor class object, defining the type of transformation and data layout to be applied. At commit time, the ``sycl::queue`` has already been provided. + A fully configured and committed object of the + :ref:`descriptor` class, whose configuration is not + inconsistent with forward DFT calculations. inout - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house both the input and output data sequences for the in-place transformation. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining all the relevant data sequences, as configured by ``desc`` + (configured for in-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). inout_re - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining the real parts of all the relevant data sequences, as configured + by ``desc``. Only with complex descriptors configured for in-place operations with + :ref:`onemkl_dft_complex_storage_real_real`. inout_im - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining the imaginary parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. in - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the input data sequence for the out-of-place transformation. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining all the relevant forward-domain data sequences, as configured by + ``desc`` (configured for out-of-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). in_re - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of the input data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + Pointer to USM allocation of sufficient capacity to store the elements + defining the real parts of all the relevant forward-domain data sequences, + as configured by ``desc``. Only with complex descriptors configured for out-of-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. + in_im - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of the input data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + Pointer to USM allocation of sufficient capacity to store the elements + defining the imaginary parts of all the relevant forward-domain data + sequences, as configured by ``desc``. Only with complex descriptors configured for + out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. + dependencies - A vector of ``sycl::event``'s that represent the previously enqueued tasks that must be finished before this transformation can be started. + An ``std::vector`` object collecting the events returned by + previously enqueued tasks that must be finished before this transform can + be calculated. .. container:: section .. rubric:: Output Parameters - inout - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house both the input and output data sequences for the in-place transformation. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining all the relevant data sequences, as configured by ``desc`` + (configured for in-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). inout_re - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining the real parts of all the relevant data sequences, as configured + by ``desc``. Only with complex descriptors configured for in-place operations with + :ref:`onemkl_dft_complex_storage_real_real`. inout_im - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of both the input and output data sequences for the in-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining the imaginary parts of all the relevant data sequences, as + configured by ``desc``. Only with complex descriptors configured for in-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. out - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the output data sequence for the out-of-place transformation. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. + Pointer to USM allocation of sufficient capacity to store the elements + defining all the relevant backward-domain data sequences, as configured by + ``desc`` (configured for out-of-place operations and not with + :ref:`onemkl_dft_complex_storage_real_real`, if complex). out_re - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the real part of the output data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + Pointer to USM allocation of sufficient capacity to store the elements + defining the real parts of all the relevant backward-domain data sequences, + as configured by ``desc``. Only with complex descriptors configured for out-of-place + operations with :ref:`onemkl_dft_complex_storage_real_real`. + out_im - USM pointer containing an array of length no less than is specified at the :ref:`descriptor construction` time to house the imaginary part of the output data sequence for the out-of-place transformation when using the ``config_value::REAL_REAL`` format for the ``config_param::COMPLEX_STORAGE`` configuration parameter. Corresponds to the choice of ``config_value::NOT_INPLACE`` for the configuration parameter ``config_param::PLACEMENT``. - + Pointer to USM allocation of sufficient capacity to store the elements + defining the imaginary parts of all the relevant backward-domain data + sequences, as configured by ``desc``. Only with complex descriptors configured for + out-of-place operations with :ref:`onemkl_dft_complex_storage_real_real`. .. container:: section .. rubric:: Throws - The `oneapi::mkl::dft::compute_forward()` routine shall throw the following exceptions if the associated condition is detected. An implementation may throw additional implementation-specific exception(s) in case of error conditions not covered here: + The ``oneapi::mkl::dft::compute_forward()`` routine shall throw the following + exceptions if the associated condition is detected. An implementation may + throw additional implementation-specific exception(s) in case of error + conditions not covered here: :ref:`oneapi::mkl::invalid_argument()` - If the provided :ref:`onemkl_dft_descriptor` class is invalid, for instance, if it is a nullptr or if the value of ``config_param::COMMIT_STATUS`` in descriptor is not ``config_param::COMMITTED``. It will also be thrown if the input/output pointers are NULL. - - - + If the provided :ref:`descriptor` object ``desc`` + is invalid, for instance, if its configuration value associated with + configuration parameter ``config_param::COMMIT_STATUS`` is not + ``config_param::COMMITTED``. It will also be thrown if any required + input/output pointer is ``nullptr``. .. container:: section .. rubric:: Return Values - This function returns a ``sycl::event`` that allows to track progress of this transformation, and can be passed as a dependency to other routines that may depend on the results of this transformation to be finished before proceeding with the other operations. - + This function returns a ``sycl::event`` object that allows to track progress + of the forward DFT, and can be passed as a dependency to other routines that + may depend on the result of the forward transform(s) before proceeding with + other operations. **Parent topic:** :ref:`onemkl_dft` - - diff --git a/source/elements/oneMKL/source/domains/dft/config_params/data_layouts.rst b/source/elements/oneMKL/source/domains/dft/config_params/data_layouts.rst new file mode 100644 index 000000000..b1b43f685 --- /dev/null +++ b/source/elements/oneMKL/source/domains/dft/config_params/data_layouts.rst @@ -0,0 +1,301 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_dft_config_data_layouts: + +Configuration of Data Layouts +----------------------------- + +The DFT interface provides the configuration parameters +``config_param::FWD_STRIDES`` (resp. ``config_param::BWD_STRIDES``) +to define the data layout locating entries of relevant data sequences in the +forward (resp. backward) domain. In case of batched transforms, *i.e.*, if +:math:`M > 1` is configured by setting ``config_param::NUMBER_OF_TRANSFORMS`` +accordingly, ``config_param::FWD_DISTANCE`` (resp. +``config_param::BWD_DISTANCE``) completes the description of the data layout by +specifying the distances between successive data sequences in the forward (resp. +backward) domain. + +Using the notations from the :ref:`introduction` and the +superscript :math:`\text{fwd}` (resp. :math:`\text{bwd}`) for data sequences +belonging to forward (resp. backward) domain, for any :math:`m` and multi-index +:math:`\left(k_1, k_2, \ldots, k_d\right)` within :ref:`valid +range`, the corresponding entry +:math:`\left(\cdot\right)^{m}_{k_{1}, k_{2}, \dots, k_d }` - or the real or +imaginary part thereof - of the relevant data sequence is located at index + +.. math:: + s^{\text{xwd}}_0 + k_1\ s^{\text{xwd}}_1 + k_2\ s^{\text{xwd}}_2 + \dots + k_d\ s^{\text{xwd}}_d + m\ l^{\text{xwd}} + :label: eq_idx_data_layout + +of the corresponding data container (``sycl::buffer`` object or +device-accessible USM allocation) provided to the compute function, the base +data type of which is (possibly implicitly re-interpreted) as documented in the +:ref:`table` +below. In the index expression :eq:`eq_idx_data_layout`, +:math:`\text{x} = \text{f}` (resp. :math:`\text{x} = \text{b}`) for entries of +forward-domain (resp. backward-domain) data sequences and + +- :math:`s^{\text{xwd}}_j`, :math:`\forall j \in \lbrace 0, \ldots, d\rbrace` + represents the :ref:`offset and generalized + strides` defining the locations of entries within + each :math:`d`-dimensional data sequence in the forward (resp. backward) + domain if :math:`\text{x} = \text{f}` (resp. if :math:`\text{x} = \text{b}`), + counted in number of elements of the relevant :ref:`implicitly-assumed + elementary data type`; +- :math:`l^{\text{xwd}}` represents the + :ref:`distance` between successive + :math:`d`-dimensional data sequences in the forward (resp. backward) domain if + :math:`\text{x} = \text{f}` (resp. if :math:`\text{x} = \text{b}`), counted + in number of elements of the relevant :ref:`implicitly-assumed elementary data + type`. + +.. note:: + All data sequences (or respective real and imaginary parts thereof if + separately stored) must belong to the same block allocation, as a + consequence of the generalized index :eq:`eq_idx_data_layout`. + +.. _onemkl_dft_config_data_implicitly_assumed_elementary_data_type: + +.. rubric:: Implicitly-assumed elementary data type + +When reading or writing an element at index :eq:`eq_idx_data_layout` of any +user-provided data container used at compute time, a +:ref:`descriptor` object may re-interpret the base data +type of that data container into an implicitly-assumed elementary data type. +That implicitly-assumed data type depends on the object type, *i.e.*, on the +specialization values used for the template parameters when instantiating the +:ref:`descriptor` class, and, in case of complex +descriptors, on the configuration value set for its configuration parameter +``config_param::COMPLEX_STORAGE``. The table below lists the implicitly-assumed +data type in either domain (last 2 columns) based on the object type and +its configuration value for ``config_param::COMPLEX_STORAGE`` (first 2 columns). + +.. list-table:: + :header-rows: 1 + :class: longtable + + * - Object type + - Configuration value for configuration paramer ``config_param::COMPLEX_STORAGE`` + - Implicitly-assumed elementary data type in forward domain + - Implicitly-assumed elementary data type in backward domain + * - ``descriptor`` + - ``config_value::COMPLEX_COMPLEX`` + - ``std::complex`` + - ``std::complex`` + * - ``descriptor`` + - ``config_value::COMPLEX_COMPLEX`` + - ``std::complex`` + - ``std::complex`` + * - ``descriptor`` + - ``config_value::REAL_REAL`` + - ``float`` + - ``float`` + * - ``descriptor`` + - ``config_value::REAL_REAL`` + - ``double`` + - ``double`` + * - ``descriptor`` + - irrelevant + - ``float`` + - ``std::complex`` + * - ``descriptor`` + - irrelevant + - ``double`` + - ``std::complex`` + +.. _onemkl_dft_num_dft_data_layouts_batched_dfts: + +.. rubric:: Configuring data layouts for batched transforms + +The value :math:`l^{\text{xwd}}` in :eq:`eq_idx_data_layout` above is +communicated as an ``std::int64_t`` configuration value, set for the +configuration parameter ``config_param::FWD_DISTANCE`` if :math:`\text{x} = +\text{f}` (resp. ``config_param::BWD_DISTANCE`` if :math:`\text{x} = \text{b}`). +This value is irrelevant for unbatched transforms, *i.e.*, for descriptors set +to handle a number of transforms :math:`M` equal to :math:`1` (default behavior). + +In case of batched transforms, the number :math:`M > 1` of desired DFTs *must* +be set explicitly as an ``std::int64_t`` configuration value for the +configuration parameter ``config_param::NUMBER_OF_TRANSFORMS``. In that case, +the configuration parameters ``config_param::FWD_DISTANCE`` and +``config_param::BWD_DISTANCE`` *must also* be set explicitly since their default +configuration values of :math:`0` would break the :ref:`consistency +requirements` for any :math:`M > 1`. + +.. _onemkl_dft_fwd_bwd_strides: + +.. rubric:: Configuring strides in forward and backward domains + +The values :math:`s^{\text{xwd}}_0, s^{\text{xwd}}_1, \dots, s^{\text{xwd}}_d` +in :eq:`eq_idx_data_layout` above are communicated as elements, in that order, +of a :math:`(d+1)`-long ``std::vector`` configuration value, set +for the configuration parameter ``config_param::FWD_STRIDES`` if +:math:`\text{x} = \text{f}` (resp. ``config_param::BWD_STRIDES`` if +:math:`\text{x} = \text{b}`). The element :math:`s^{\text{xwd}}_0` represents an +absolute offset (or "displacement") in the data sets while the subsequent +elements :math:`s^{\text{xwd}}_j\ (j > 0)` are generalized strides to be +considered along dimensions :math:`j \in \lbrace 1, \ldots, d\rbrace`. + +The default values set for the forward and backward strides correspond to the +data layout configurations for unbatched, in-place transforms using unit stride +along the last dimension with no offset (and minimal padding in forward +domain in case of real descriptors, aligning with the :ref:`requirements for +in-place transforms`). In other words, the +default values are :math:`s^{\text{fwd}}_0 = s^{\text{bwd}}_0 = 0`, +:math:`s^{\text{fwd}}_d = s^{\text{bwd}}_d = 1` and, for :math:`d`-dimensional +DFTs with :math:`d > 1`, + +- :math:`s^{\text{fwd}}_{d-1} = s^{\text{bwd}}_{d-1} = n_{d}` for complex + descriptors; +- :math:`s^{\text{bwd}}_{d-1} = \lfloor \frac{n_{d}}{2} \rfloor + 1`, and + :math:`s^{\text{fwd}}_{d-1} = 2 s^{\text{bwd}}_{d-1}` for real descriptors; +- if :math:`d > 2`, :math:`s^{\text{xwd}}_k = n_{k+1} s^{\text{xwd}}_{k+1}` + for :math:`k \in \lbrace 1, \ldots, d - 2\rbrace` (for + :math:`\text{x} = \text{f}` and :math:`\text{x} = \text{b}`). + +.. _onemkl_dft_data_layout_requirements: + +.. rubric:: General consistency requirements + +In general, the distances and strides must be set so that every index value +:eq:`eq_idx_data_layout` corresponds to a *unique* entry of the data sequences +under consideration. In other words, there must not be one index value as +expressed in :eq:`eq_idx_data_layout` that corresponds to two different +:math:`(d+1)`-tuples :math:`(m, k_{1}, k_{2}, \dots, k_d)` that are both within +the :ref:`elementary range of indices considered by +oneMKL`. + +Additionally, for in-place transforms (configuration value +``config_value::INPLACE`` associated with configuration parameter +``config_param::PLACEMENT``), the smallest stride value must be associated with +the same dimension in forward and backward domains and the data layouts must +abide by following "*consistency requirement*": the memory address(es) of +leading entry(ies) along the last dimension must be identical in forward and +backward domains. Specifically, considering any :math:`(d+1)`-tuple +:math:`(m, k_{1}, k_{2}, \dots, k_{d-1}, 0)` within :ref:`valid +range`, the memory address of the +element of corresponding index value :eq:`eq_idx_data_layout` in forward domain +(considering the :ref:`implicitly assumed +type` in forward +domain) must be identical to the memory address of the element of corresponding +index value :eq:`eq_idx_data_layout` in backward domain (considering the +:ref:`implicitly assumed +type` in +backward domain). Equivalently, + +- for complex descriptors, the offset, stride(s) (and distances, if relevant) + must be equal in forward and backward domain; +- for real descriptors, offsets and strides must satisfy + :math:`s^{\text{fwd}}_{j} = 2 s^{\text{bwd}}_{j}\ \forall j \in \lbrace 0, + \ldots, d - 1\rbrace` (note that :math:`0 \leq j < d`) and distances, if + relevant, must satisfy :math:`l^{\text{fwd}} = 2 l^{\text{bwd}}`. Note that + this leads to some data padding being required in forward domain if unit + strides are used along the last dimension in forward and backward domains. + +.. _onemkl_dft_io_strides_deprecated: + +.. rubric:: Configuring strides for input and output data [deprecated, **not** recommended] + +Instead of specifying strides by domain, one may choose to specify the strides +for input and output data sequences. Let +:math:`s^{\text{x}}_{j}, \ j \in \lbrace 0, 1, \ldots, d\rbrace` be the stride +values for input (resp. output) data sequences if :math:`\text{x} = \text{i}` +(resp. :math:`\text{x} = \text{o}`). Such +:math:`s^{\text{x}}_0, s^{\text{x}}_1, \dots, s^{\text{x}}_d` values may be +communicated as elements, in that order, of a :math:`(d+1)`-long +``std::vector`` configuration value, set for the (deprecated) +configuration parameter ``config_param::INPUT_STRIDES`` if +:math:`\text{x} = \text{i}` (resp. ``config_param::OUTPUT_STRIDES`` if +:math:`\text{x} = \text{o}`). + +The values of :math:`s^{\text{i}}_{j}` and :math:`s^{\text{o}}_{j}` are to be +used and considered by oneMKL if and only if +:math:`s^{\text{fwd}}_{j} = s^{\text{bwd}}_{j} = 0, \forall j \in \lbrace 0, 1, \ldots, d\rbrace`. +(This will happen automatically if ``config_param::INPUT_STRIDES`` and ``config_param::OUTPUT_STRIDES`` +are set and ``config_param::FWD_STRIDES`` and ``config_param::BWD_STRIDES`` are not. See note below.) +In such a case, :ref:`descriptor` objects must consider +the data layouts corresponding to the two compute directions separately. As +detailed above, relevant data sequence entries are accessed as elements of data +containers (``sycl::buffer`` objects or device-accessible USM allocations) +provided to the compute function, the base data type of which is (possibly +implicitly re-interpreted) as documented in :ref:`this +table`. If using +input and output strides, for any :math:`m` and multi-index +:math:`\left(k_1, k_2, \ldots, k_d\right)` within :ref:`valid +range`, the index to be used when +accessing a data sequence entry - or part thereof - in forward domain is + +.. math:: + s^{\text{x}}_0 + k_1\ s^{\text{x}}_1 + k_2\ s^{\text{x}}_2 + \dots + k_d\ s^{\text{x}}_d + m\ l^{\text{fwd}} + +where :math:`\text{x} = \text{i}` (resp. :math:`\text{x} = \text{o}`) for +forward (resp. backward) DFT(s). Similarly, the index to be used when accessing +a data sequence entry - or part thereof - in backward domain is + +.. math:: + s^{\text{x}}_0 + k_1\ s^{\text{x}}_1 + k_2\ s^{\text{x}}_2 + \dots + k_d\ s^{\text{x}}_d + m\ l^{\text{bwd}} + +where :math:`\text{x} = \text{o}` (resp. :math:`\text{x} = \text{i}`) for +forward (resp. backward) DFT(s). + +As a consequence, configuring :ref:`descriptor` objects +using these deprecated configuration parameters makes their configuration +direction-dependent when different stride values are used in +forward and backward domains. Since the intended compute direction is unknown +to the :ref:`descriptor` object when +:ref:`committing` it, every direction that results +in a :ref:`consistent data layout` in +forward and backward domains must be supported by successfully committed +:ref:`descriptor` objects. + +.. note:: + For :ref:`descriptor` objects with strides configured + via these deprecated configuration parameters, the :ref:`consistency + requirements` may be satisfied for only + one of the two compute directions, *i.e.*, for only one of the forward or + backward DFT(s). Such a configuration should not cause an exception to be + thrown by the descriptor's :ref:`onemkl_dft_descriptor_commit` member + function but the behavior of oneMKL is undefined if using that object for + the compute direction that does not align with the :ref:`consistency + requirements`. + +.. note:: + Setting either of ``config_param::INPUT_STRIDES`` or + ``config_param::OUTPUT_STRIDES`` triggers any default or previously-set + values for ``config_param::FWD_STRIDES`` and ``config_param::BWD_STRIDES`` + to reset to ``std::vector(d+1, 0)`` values, and vice versa. + This default behavior prevents mix-and-matching usage of either of + ``config_param::INPUT_STRIDES`` or ``config_param::OUTPUT_STRIDES`` with + either of ``config_param::FWD_STRIDES`` or ``config_param::BWD_STRIDES``, + which is **not** to be supported. If such a configuration is attempted, an + exception is to be thrown at commit time due to invalid configuration, as + the stride values that were implicitly reset surely invalidate the + :ref:`consistency requirements` for any + non-trivial DFT. + +If specifying the data layout strides using these deprecated configuration +parameters and if the strides differ in forward and backward domain, the +descriptor *must* be re-configured and re-committed for computing the DFT in +the reverse direction as shown below. + +.. code-block:: cpp + + // ... + desc.set_value(config_param::INPUT_STRIDES, fwd_domain_strides); + desc.set_value(config_param::OUTPUT_STRIDES, bwd_domain_strides); + desc.commit(queue); + compute_forward(desc, ...); + // ... + desc.set_value(config_param::INPUT_STRIDES, bwd_domain_strides); + desc.set_value(config_param::OUTPUT_STRIDES, fwd_domain_strides); + desc.commit(queue); + compute_backward(desc, ...); + +The ``config_param::INPUT_STRIDES`` and ``config_param::OUTPUT_STRIDES`` +parameters are deprecated. A warning message "{IN,OUT}PUT_STRIDES are deprecated: +please use {F,B}WD_STRIDES, instead." is to be reported to applications using +these configuration parameters. + +**Parent topic** :ref:`onemkl_dft_enums` diff --git a/source/elements/oneMKL/source/domains/dft/config_params/distance.rst b/source/elements/oneMKL/source/domains/dft/config_params/distance.rst deleted file mode 100644 index 14afd2426..000000000 --- a/source/elements/oneMKL/source/domains/dft/config_params/distance.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _onemkl_dft_config_distance: - -FORWARD_DISTANCE and BACKWARD_DISTANCE --------------------------------------- - -The FFT interface enables computation of multiple transforms. To compute multiple transforms, you need to specify the data distribution of the multiple sets of data. The distance between the first data elements of consecutive data sets, ``FORWARD_DISTANCE`` for forward :ref:`onemkl_dft_enum_domain` data or ``BACKWARD_DISTANCE`` for backward :ref:`onemkl_dft_enum_domain` data, specifies the distribution. The configuration setting is a value of ``std::int64_t`` data type. - -The default value for both configuration settings is one. You must set this parameter explicitly if the number of transforms is greater than one (see :ref:`onemkl_dft_config_number_of_transforms` ). - -The distance is counted in elements of the data type defined by the descriptor configuration (rather than by the type of the variable passed to the computation functions). Specifically, the :ref:`onemkl_dft_enum_domain` template parameter, and the ``COMPLEX_STORAGE``, ``REAL_STORAGE`` and ``CONJUGATE_EVEN_STORAGE`` configuration parameters described in :ref:`onemkl_dft_config_storage_formats` define the type of the elements as shown in the :ref:`complex_storage`, :ref:`real_storage` and :ref:`conjugate_even_storage` tables. - -For in-place transforms ( ``PLACEMENT=INPLACE`` ), the configuration set by ``FORWARD_DISTANCE`` and ``BACKWARD_DISTANCE`` should be consistent, that is, the locations of the data sets for input and output must coincide. - - -**Parent topic:** :ref:`onemkl_dft_enums` - diff --git a/source/elements/oneMKL/source/domains/dft/config_params/number_of_transforms.rst b/source/elements/oneMKL/source/domains/dft/config_params/number_of_transforms.rst deleted file mode 100644 index 42d7b4dc4..000000000 --- a/source/elements/oneMKL/source/domains/dft/config_params/number_of_transforms.rst +++ /dev/null @@ -1,21 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _onemkl_dft_config_number_of_transforms: - -Number of Transforms --------------------- - -If you need to perform a large number of identical DFTs, you can do this in a single call to a compute* function with the value of this configuration parameter equal to the actual number of the transforms. The default value is 1. You can set this parameter to a positive integer value using the ``std::int64_t`` data type. - -When setting the number of transforms to a value greater than one, you also need to specify the distance between the forward data sets and the distance between the backward data sets using the ``config_param::FWD_DISTANCE`` and ``config_param::BWD_DISTANCE`` configuration parameters corresponding to the specified :ref:`onemkl_dft_enum_domain`. - -.. note:: - * The data sets must not have common elements - * All the sets of data in each domain must be located within the same memory block. - - -**Parent topic** :ref:`onemkl_dft_enums` - - diff --git a/source/elements/oneMKL/source/domains/dft/config_params/scaling_factor.rst b/source/elements/oneMKL/source/domains/dft/config_params/scaling_factor.rst deleted file mode 100644 index 32735d872..000000000 --- a/source/elements/oneMKL/source/domains/dft/config_params/scaling_factor.rst +++ /dev/null @@ -1,13 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _onemkl_dft_config_scale: - -Forward and Backward Scale ---------------------------- - -The forward and backward transformations are each associated with a scale factor, :math:`\sigma`, having the default value of 1. For example, for a one-dimensional transform of length n , you can use the default scale of 1 for the forward transform and set the scale factor for the backward transform to be 1/ n , thus making the backward transform the inverse of the forward transform. Use real floating point data type corresponding to :ref:`onemkl_dft_enum_precision`. - - -**Parent topic** :ref:`onemkl_dft_enums` diff --git a/source/elements/oneMKL/source/domains/dft/config_params/storage_formats.rst b/source/elements/oneMKL/source/domains/dft/config_params/storage_formats.rst index 858d065b1..0dfdf214a 100644 --- a/source/elements/oneMKL/source/domains/dft/config_params/storage_formats.rst +++ b/source/elements/oneMKL/source/domains/dft/config_params/storage_formats.rst @@ -2,278 +2,200 @@ .. .. SPDX-License-Identifier: CC-BY-4.0 -.. _onemkl_dft_config_storage_formats: - -Storage Formats ---------------- - -Depending on the value of the :ref:`onemkl_dft_enum_domain` template value, the implementation of the DFT supports several storage schemes for input and output data. (See ``Charles Van Loan, Computational Frameworks for the Fast Fourier Transform , SIAM, Philadelphia, 1992`` for motivation of these schemes). - -The data elements are placed within contiguous memory blocks, defined with generalized strides (see :ref:`onemkl_dft_config_strides`). For multiple transforms, all sets of data should be located within the same memory block, and the data sets should be placed at the same distance from each other (see :ref:`NUMBER_OF TRANSFORMS` and ``:config_param::FWD_DISTANCE``, ``config_param::BWD_DISTANCE`` ). - -The input data and strides sizes are stored and offsets counted in terms of elements of the data type (complex or real) based on the storage format and :ref:`forward domain` as seen in :ref:`Element types for complex-to-complex transformation and COMPLEX_STORAGE `, :ref:`Element types for real-to-complex transformations and REAL_STORAGE` and :ref:`Element types for real-to-complex transformations and CONJUGATE_EVEN_STORAGE`. +.. _onemkl_dft_data_storage: +Data storage +============ +The data storage convention observed by a +:ref:`descriptor` object depends on whether it is a real +or complex descriptor and, in case of complex descriptors, on the configuration +value associated with configuration parameter ``config_param::COMPLEX_STORAGE``. .. _onemkl_dft_complex_storage: -COMPLEX_STORAGE -+++++++++++++++ - -For the :ref:`onemkl_dft_enum_domain` template parameter with value ``COMPLEX``, both input and output sequences belong to the complex domain. In this case, the configuration parameter COMPLEX_STORAGE can have one of the two values:``COMPLEX_COMPLEX`` (default) or ``REAL_REAL`` . +Complex descriptors +------------------- + +For a complex descriptor, the configuration parameter +``config_param::COMPLEX_STORAGE`` specifies how the entries of the complex data +sequences it consumes and produces are stored. If that configuration parameter is +associated with a configuration value ``config_value::COMPLEX_COMPLEX`` (default +behavior), those entries are accessed and stored as ``std::complex`` +(resp. ``std::complex``) elements of a single data container +(device-accessible USM allocation or ``sycl::buffer`` object) if the +:ref:`descriptor` object is a single-precision (resp. +double-precision) descriptor. If the configuration value +``config_value::REAL_REAL`` is used instead, the real and imaginary parts of +those entries are accessed and stored as ``float`` (resp. ``double``) elements +of two separate, non-overlapping data containers (device-accessible USM +allocations or ``sycl::buffer`` objects) if the +:ref:`descriptor` object is a single-precision (resp. +double-precision) descriptor. + +These two behaviors are further specified and illustrated below. .. _onemkl_dft_complex_storage_complex_complex: -.. rubric:: COMPLEX_COMPLEX - -With the ``config_value::COMPLEX_COMPLEX`` storage, complex-valued data sequences are stored in a single complex container (array/``sycl::buffer``), AZ, so that a complex-valued element :math:`z_{k_1, k_2,\dots ,k_d}` of the m-th d-dimensional sequence is accessed at :math:`AZ[m*\text{distance} + \text{stride}_0 + k_1*\text{stride}_1 + k_2*\text{stride}_2 + \dots + k_d *\text{stride}_d ]` as a structure consisting of the real and imaginary parts. This code illustrates the use of ``config_value::COMPLEX_COMPLEX`` storage with three dimensions (:math:`n_1,n_2,n_3`) and m batches: +.. rubric:: ``config_value::COMPLEX_COMPLEX`` for ``config_param::COMPLEX_STORAGE`` + +For complex descriptors with parameter ``config_param::COMPLEX_STORAGE`` set to +``config_value::COMPLEX_COMPLEX``, each of forward- and backward-domain data +sequences must belong to a single data container (device-accessible USM +allocation or ``sycl::buffer`` object). Any relevant entry +:math:`\left(\cdot\right)^{m}_{k_1, k_2,\dots ,k_d}` is accessed/stored from/in +a data container provided at compute time at the index value expressed in eq. +:eq:`eq_idx_data_layout` (from :ref:`this page`) +of that data container, whose elementary data type is (possibly implicitly +re-interpreted as) ``std::complex`` (resp. ``std::complex``) for +single-precision (resp. double-precision) descriptors. + +The same unique data container is to be used for forward- and backward-domain +data sequences for in-place transforms (for +:ref:`descriptor` objects with configuration value +``config_value::INPLACE`` for configuration parameter +``config_param::PLACEMENT``). Two separate data containers sharing no common +elements are to be used for out-of-place transforms (for +:ref:`descriptor` objects with configuration value +``config_value::NOT_INPLACE`` for configuration parameter +``config_param::PLACEMENT``). + +The following snippet illustrates the usage of ``config_value::COMPLEX_COMPLEX`` +for configuration parameter ``config_param::COMPLEX_STORAGE``, in the +context of in-place, single-precision (fp32) calculations of :math:`M` +three-dimensional :math:`n_1 \times n_2 \times n_3` complex transforms, using +identical (default) strides and distances in forward and backward domains, with +USM allocations. .. code-block:: cpp - std::complex * AZ; // 2*sizeof(datatype)*n1*n2*n3*m - std::vector ios; // length 4 of sizes - std::int64_t iodist; - - // ... - - // on input: Z(k1,k2,k3,m) - // = AZ[ ios[0] + k1*ios[1] + k2*ios[2] + k3*ios[3] + m*iodist ] - compute_forward(descr, AZ); // complex-to-complex in-place FFT - // on output: Z{k1,k2,k3,m} - // = AZ[ ios[0] + k1*ios[1] + k2*ios[2] + k3*ios[3] + m*iodist ] - - - + namespace dft = oneapi::mkl::dft; + dft::descriptor desc({n1, n2, n3}); + std::vector strides({0, n2*n3, n3, 1}); + std::int64_t dist = n1*n2*n3; + std::complex *Z = (std::complex *) malloc_device(2*sizeof(float)*n1*n2*n3*M, queue); + desc.set_value(dft::config_param::FWD_STRIDES, strides); + desc.set_value(dft::config_param::BWD_STRIDES, strides); + desc.set_value(dft::config_param::FWD_DISTANCE, dist); + desc.set_value(dft::config_param::BWD_DISTANCE, dist); + desc.set_value(dft::config_param::NUMBER_OF_TRANSFORMS, M); + desc.set_value(dft::config_param::COMPLEX_STORAGE, dft::config_value::COMPLEX_COMPLEX); + desc.commit(queue); + + // initialize forward-domain data such that entry {m;k1,k2,k3} + // = Z[ strides[0] + k1*strides[1] + k2*strides[2] + k3*strides[3] + m*dist ] + compute_forward(desc, Z); // complex-to-complex in-place DFT + // in backward domain: entry {m;k1,k2,k3} + // = Z[ strides[0] + k1*strides[1] + k2*strides[2] + k3*strides[3] + m*dist ] .. _onemkl_dft_complex_storage_real_real: -.. rubric:: REAL_REAL - -With the ``config_value::REAL_REAL`` storage, complex-valued data sequences are stored by two real containers (arrays/``sycl::buffer``'s), AR and AI, so that a complex-valued element :math:`z_{k_1, k_2, \dots, k_d}` of the m-th d-dimensional sequence has real part :math:`AR[m*\text{distance} + \text{stride}_0 + k_1*\text{stride}_1 + k_2*\text{stride}_2 + \dots + k_d*\text{stride}_d ]` and imaginary part :math:`AI[m*\text{distance} + \text{stride}_0 + k_1*\text{stride}_1 + k_2*\text{stride}_2 + \dots + k_d*\text{stride}_d ]`. This code illustrates the use of ``config_value::REAL_REAL`` storage with three dimensions (:math:`n_1,n_2,n_3`) and m batches: +.. rubric:: ``config_value::REAL_REAL`` for ``config_param::COMPLEX_STORAGE`` + +For complex descriptors with parameter ``config_param::COMPLEX_STORAGE`` set to +``config_value::REAL_REAL``, forward- and backward-domain data sequences are +read/stored from/in two different, non-overlapping data containers +(device-accessible USM allocations or ``sycl::buffer`` objects) encapsulating +the real and imaginary parts of the relevant entries separately. The real and +imaginary parts of any relevant complex entry +:math:`\left(\cdot\right)^{m}_{k_1, k_2,\dots ,k_d}` are both stored at the index value +expressed in eq. :eq:`eq_idx_data_layout` (from :ref:`this +page`) of their respective data containers, whose elementary +data type is (possibly implicitly re-interpreted as) ``float`` (resp. +``double``) for single-precision (resp. double-precision) descriptors. + +The same two data containers are to be used for real and imaginary parts of +forward- and backward-domain data sequences for in-place transforms (for +:ref:`descriptor` objects with configuration value +``config_value::INPLACE`` for configuration parameter +``config_param::PLACEMENT``). Four separate data containers sharing no common +elements are to be used for out-of-place transforms (for +:ref:`descriptor` objects with configuration value +``config_value::NOT_INPLACE`` for configuration parameter +``config_param::PLACEMENT``). + +The following snippet illustrates the usage of ``config_value::REAL_REAL`` +set for configuration parameter ``config_param::COMPLEX_STORAGE``, in the +context of in-place, single-precision (fp32) calculation of :math:`M` +three-dimensional :math:`n_1 \times n_2 \times n_3` complex transforms, using +identical (default) strides and distances in forward and backward domains, with +USM allocations. .. code-block:: cpp - datatype * AR; // sizeof(datatype)*n1*n2*n3*m - datatype * AI; // sizeof(datatype)*n1*n2*n3*m - std::vector ios; // length 4 of strides - std::int64_t iodist; - - // ... - - // on input: Z(k1,k2,k3,m) - // = AR[ ios[0] + k1*ios[1] + k2*ios[2] + k3*ios[3] + m*iodist ] - // + i*AI[ ios[0] + k1*ios[1] + k2*ios[2] + k3*ios[3] + m*iodist ] - compute_forward(descr, AR, AI); // complex-to-complex in-place FFT - // on output: Z{k1,k2,k3,m} - // = AR[ ios[0] + k1*ios[1] + k2*ios[2] + k3*ios[3] + m*iodist ] - // + i*AI[ ios[0] + k1*ios[1] + k2*ios[2] + k3*ios[3] + m*iodist ] - + namespace dft = oneapi::mkl::dft; + dft::descriptor desc({n1, n2, n3}); + std::vector strides({0, n2*n3, n3, 1}); + std::int64_t dist = n1*n2*n3; + float *ZR = (float *) malloc_device(sizeof(float)*n1*n2*n3*M, queue); // data container for real parts + float *ZI = (float *) malloc_device(sizeof(float)*n1*n2*n3*M, queue); // data container for imaginary parts + desc.set_value(dft::config_param::FWD_STRIDES, strides); + desc.set_value(dft::config_param::BWD_STRIDES, strides); + desc.set_value(dft::config_param::FWD_DISTANCE, dist); + desc.set_value(dft::config_param::BWD_DISTANCE, dist); + desc.set_value(dft::config_param::NUMBER_OF_TRANSFORMS, M); + desc.set_value(dft::config_param::COMPLEX_STORAGE, dft::config_value::REAL_REAL); + desc.commit(queue); + + // initialize forward-domain data such that the real part of entry {m;k1,k2,k3} + // = ZR[ strides[0] + k1*strides[1] + k2*strides[2] + k3*strides[3] + m*dist ] + // and the imaginary part of entry {m;k1,k2,k3} + // = ZI[ strides[0] + k1*strides[1] + k2*strides[2] + k3*strides[3] + m*dist ] + compute_forward(desc, ZR, ZI); // complex-to-complex in-place DFT + // in backward domain: the real part of entry {m;k1,k2,k3} + // = ZR[ strides[0] + k1*strides[1] + k2*strides[2] + k3*strides[3] + m*dist ] + // and the imaginary part of entry {m;k1,k2,k3} + // = ZI[ strides[0] + k1*strides[1] + k2*strides[2] + k3*strides[3] + m*dist ] .. _onemkl_dft_real_storage: -REAL_STORAGE -++++++++++++ - -For the :ref:`onemkl_dft_enum_domain` template parameter with value ``REAL``, only the value of ``REAL_REAL`` is supported. - -.. _onemkl_dft_real_storage_real_real: - -.. rubric:: REAL_REAL - -With the ``REAL_REAL`` storage, real-valued data sequences in a real domain are stored by one real container (array/``sycl::buffer``), AR, so that a real-valued element :math:`r_{k_1, k_2, \dots, k_d}` of the m-th d-dimensional sequence is accessed as :math:`AR[m*\text{distance} + \text{stride}_0 + k_1*\text{stride}_1 + k_2*\text{stride}_2 + \dots + k_d*\text{stride}_d ]`. This code illustrates the use of ``config_value::REAL_REAL`` storage with three dimensions (:math:`n_1,n_2,n_3`) and m batches: +Real descriptors +---------------- + +Real descriptors observe only one type of data storage. Any relevant (real) +entry :math:`\left(\cdot\right)^{m}_{k_1, k_2,\dots ,k_d}` of a data sequence +in forward domain is accessed and stored as a ``float`` (resp. ``double``) +element of a single data container (device-accessible USM allocation or +``sycl::buffer`` object) if the :ref:`descriptor` object +is a single-precision (resp. double-precision) descriptor. Any relevant +(complex) entry :math:`\left(\cdot\right)^{m}_{k_1, k_2,\dots ,k_d}` of a data +sequence in backward domain is accessed and stored as a ``std::complex`` +(resp. ``std::complex``) element of a single data container +(device-accessible USM allocation or ``sycl::buffer`` object) if the +:ref:`descriptor` object is a single-precision (resp. +double-precision) descriptor. + +The following snippet illustrates the usage of a real, single-precision +descriptor (and the corresponding data storage) for the in-place, +single-precision (fp32), calculation of :math:`M` three-dimensional +:math:`n_1 \times n_2 \times n_3` real transforms, using default strides in +forward and backward domains, with USM allocations. .. code-block:: cpp - datatype * AR; // sizeof(datatype)*n1*n2*n3*m - datatype * AI; // sizeof(datatype)*n1*n2*n3*m - std::vector ios; // length 4 of strides - std::int64_t iodist; - - // ... - - // on input: R(k1,k2,k3,m) - // = AR[ ios[0] + k1*ios[1] + k2*ios[2] + k3*ios[3] + m*iodist ] - compute_forward(descr, AR, AI); // real-to-complex in-place FFT - // on output: Z{k1,k2,k3,m} - // = AR[ ios[0] + k1*ios[1] + k2*ios[2] + k3*ios[3] + m*iodist ] - // + i*AI[ ios[0] + k1*ios[1] + k2*ios[2] + k3*ios[3] + m*iodist ] - - - - -.. _onemkl_dft_conjugate_even_storage: - -CONJUGATE_EVEN_STORAGE -++++++++++++++++++++++ - -For the :ref:`onemkl_dft_enum_domain` template parameter with value ``REAL`` and considered as a conjugate-even domain, the value of ``config_value::COMPLEX_COMPLEX`` is supported. The conjugate-even symmetry of the data enables storing only about a half of the whole mathematical result, so that one part of it can be directly referenced in the memory while the other part can be reconstructed depending on the selected storage configuration. The ``config_param::PACKED_FORMAT`` configuration parameter defines how the data is packed. Possible values for ``config_param::PACKED_FORMAT`` depend on the values of the ``config_param::CONJUGATE_EVEN_STORAGE`` configuration parameter. - -.. tabularcolumns:: l|c| - -.. list-table:: - :header-rows: 1 - :class: longtable - - * - CONJUGATE_EVEN_STORAGE - - Supported PACKED_FORMATS - * - :ref:`onemkl_dft_conjugate_even_storage_complex_complex` - - ``config_value::CCE_FORMAT`` can be used with transforms of any dimension. - - - -.. _onemkl_dft_conjugate_even_storage_complex_complex: - -.. rubric:: COMPLEX_COMPLEX - -There is only one ``config_param::PACKED_FORMAT`` supported by the ``config_value::COMPLEX_COMPLEX`` value for ``config_param::CONJUGATE_EVEN_STORAGE``, mainly the ``config_value::CCE_FORMAT``. The complex-valued data sequence consists of one complex container (array/``sycl::buffer``), AZ, so that a complex-valued element :math:`z_{k_1, k_2, \dots, k_d}` of the m-th d-dimensional sequence can be accessed or reconstructed as follows: - -Consider a d-dimensional real-to-complex transform. - -Because the input sequence, R, is real-valued, the mathematical result, Z, has conjugate-even symmetry: -:math:`z_{k_1, k_2, \dots, k_d} = \text{conjugate}( z_{n_1-k_1, n_2-k_2, \dots, n_d-k_d} )`, -where index arithmetic is performed modulo the length of the respective dimension. Obviously, the first element of the result is real-valued: -:math:`z_{0, 0, \dots, 0} = \text{conjugate}( z_{0, 0, \dots, 0} )`. - -For dimensions with even lengths, some of the other elements are real-valued as well. For example, if :math:`n_s` is even, then -:math:`z_{0, 0, \dots, \frac{n_s}{2}, 0, \dots, 0} = \text{conjugate}( z_{0, 0, \dots, \frac{n_s}{2}, 0, \dots, 0} )`. -With the conjugate-even symmetry, approximately a half of the result suffices to fully reconstruct it. For an arbitrary dimension, :math:`h` , it suffices to store elements :math:`z_{k_1, \dots, k_h , \dots, k_d}` for the following indices: - -* :math:`k_h = 0, \dots, \left[ \frac{n_h}{2}\right]` -* :math:`k_i = 0, \dots, n_i-1`, where :math:`i = 1,\dots, d` and :math:`i \neq h` - -and assuming that integer division rounds down. - -The symmetry property enables reconstructing the remaining elements: for :math:`k_h = \left[ \frac{n_h}{2}\right] + 1, \dots , n_h - 1`. The halved dimension is always assumed to be the dimension for which storage is contiguous in memory (see strides), for example in a 2D row-major format, it is the last dimension and for 2D column-major format it is the first dimension. - -.. _onemkl_dft_complex_complex_cce_1d_even_or_odd: - -.. rubric:: Packed complex domain formats for a 1D real-to-complex transformation considered as a conjugate-even-domain with :ref:`onemkl_dft_conjugate_even_storage_complex_complex` storage and :math:`n=2L` (even size) or :math:`n=2L+1` (odd size). - -.. tabularcolumns:: l|c|c|c|c|c|c|c| - -.. list-table:: - :header-rows: 1 - :class: longtable - - * - :math:`k=` - - 0 - - 1 - - 2 - - :math:`\dots` - - L-2 - - L-1 - - L - * - CCE - - :math:`Z_0` - - :math:`Z_1` - - :math:`Z_2` - - :math:`\dots` - - :math:`Z_{L-2}` - - :math:`Z_{L-1}` - - :math:`Z_{L}` - - -.. _onemkl_dft_complex_complex_cce_2d_even_even: - -.. rubric:: Packed complex domain formats for a 2D :math:`n_1\times n_2` real-to-complex transformations considered as a conjugate-even-domain with :ref:`onemkl_dft_conjugate_even_storage_complex_complex` storage and :math:`n_1=2K` (even size) and :math:`n_2=2L` (even size) using row-major input data. - -.. tabularcolumns:: |c|c|c|c|c|c|c| - -.. list-table:: - :header-rows: 1 - :stub-columns: 1 - :class: longtable - - * - :math:`k_1\backslash k_2` - - :math:`0` - - 1 - - 2 - - :math:`\dots` - - L-1 - - L - * - :math:`0` - - :math:`Z_{0,0}` - - :math:`Z_{0,1}` - - :math:`Z_{0,2}` - - :math:`\dots` - - :math:`Z_{0,L-1}` - - :math:`Z_{0,L}` - * - 1 - - :math:`Z_{1,0}` - - :math:`Z_{1,1}` - - :math:`Z_{1,2}` - - :math:`\dots` - - :math:`Z_{1,L-1}` - - :math:`Z_{1,L}` - * - 2 - - :math:`Z_{2,0}` - - :math:`Z_{2,1}` - - :math:`Z_{2,2}` - - :math:`\dots` - - :math:`Z_{2,L-1}` - - :math:`Z_{2,L}` - * - :math:`\dots` - - :math:`\dots` - - :math:`\dots` - - :math:`\dots` - - :math:`\dots` - - :math:`\dots` - - :math:`\dots` - * - :math:`n_1-2` - - :math:`Z_{n_1-2,0}` - - :math:`Z_{n_1-2,1}` - - :math:`Z_{n_1-2,2}` - - :math:`\dots` - - :math:`Z_{n_1-2,L-1}` - - :math:`Z_{n_1-2,L}` - * - :math:`n_1-1` - - :math:`Z_{n_1-1,0}` - - :math:`Z_{n_1-1,1}` - - :math:`Z_{n_1-1,2}` - - :math:`\dots` - - :math:`Z_{n_1-1,L-1}` - - :math:`Z_{n_1-1,L}` - - - -The following code illustrates usage of the ``config_value::COMPLEX_COMPLEX`` storage for a two-dimensional conjugate-even domain with row-major input data: - -.. code-block:: cpp - - datatype * AR; // sizeof(datatype)*n1*n2*m - std::complex * AZ; // sizeof(datatype)*n1*n2*m - std::vector is; // length 3 of input strides - std::vector os; // length 3 of output strides - std::int64_t idist, odist; - - // ... - - // on input: R(k1,k2,m) - // = AR[ is[0] + k1*is[1] + k2*is[2] + m*idist ] - compute_forward(descr, AR, AZ); // real-to-complex out-of-place FFT - // on output: - // for k2=0,n2/2: Z{k1,k2,m} = AZ[os[0] + k1*os[1] + k2*os[2] + m*odist] - // for k2=n2/2+1,n2-1: Z{k1,k2,m} = conj(AZ[os[0] + (n1-k1)%n1*os[1] - // + (n2-k2)%n2*os[2] + m*odist]) - -For the backward transform, the input and output parameters and layouts exchange roles. Set the strides describing the layout in the backward/forward domain as input/output strides, respectively. For example: - -.. code-block:: cpp - - // ... - descr.set_value(config_param::INPUT_STRIDES, fwd_domain_strides); - descr.set_value(config_param::OUTPUT_STRIDES, bwd_domain_strides); - descr.commit(queue); - compute_forward(descr, ...); - // ... - descr.set_value(config_param::INPUT_STRIDES, bwd_domain_strides); - descr.set_value(config_param::OUTPUT_STRIDES, fwd_domain_strides); - descr.commit(queue); - compute_backward(descr, ...); - - - + namespace dft = oneapi::mkl::dft; + dft::descriptor desc({n1, n2, n3}); + // Note: integer divisions here below + std::vector fwd_strides({0, 2*n2*(n3/2 + 1), 2*(n3/2 + 1), 1}); + std::vector bwd_strides({0, n2*(n3/2 + 1), (n3/2 + 1), 1}); + std::int64_t fwd_dist = 2*n1*n2*(n3/2 + 1); + std::int64_t bwd_dist = n1*n2*(n3/2 + 1); + float *data = (float *) malloc_device(sizeof(float)*fwd_dist*M, queue); // data container + desc.set_value(dft::config_param::FWD_STRIDES, fwd_strides); + desc.set_value(dft::config_param::BWD_STRIDES, bwd_strides); + desc.set_value(dft::config_param::FWD_DISTANCE, fwd_dist); + desc.set_value(dft::config_param::BWD_DISTANCE, bwd_dist); + desc.set_value(dft::config_param::NUMBER_OF_TRANSFORMS, M); + desc.commit(queue); + + // initialize forward-domain data such that real entry {m;k1,k2,k3} + // = data[ fwd_strides[0] + k1*fwd_strides[1] + k2*fwd_strides[2] + k3*fwd_strides[3] + m*fwd_dist ] + compute_forward(desc, data); // real-to-complex in-place DFT + // in backward domain, the implicitly-assumed type is complex so, considering + // std::complex* complex_data = static_cast*>(data); + // we have entry {m;k1,k2,k3} + // = complex_data[ bwd_strides[0] + k1*bwd_strides[1] + k2*bwd_strides[2] + k3*bwd_strides[3] + m*bwd_dist ] + // for 0 <= k3 <= n3/2. + // Note: if n3/2 < k3 < n3, entry {m;k1,k2,k3} = std::conj(entry {m;n1-k1,n2-k2,n3-k3}) **Parent topic** :ref:`onemkl_dft_enums` - - diff --git a/source/elements/oneMKL/source/domains/dft/config_params/strides.rst b/source/elements/oneMKL/source/domains/dft/config_params/strides.rst deleted file mode 100644 index c20cbe070..000000000 --- a/source/elements/oneMKL/source/domains/dft/config_params/strides.rst +++ /dev/null @@ -1,76 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _onemkl_dft_config_strides: - -INPUT_STRIDES and OUTPUT_STRIDES --------------------------------- - -The FFT interface provides configuration parameters that define the layout of multidimensional data in the computer memory. For d-dimensional data set, :math:`X`, defined by dimensions :math:`n_1\times n_2 \times\dots\times n_d` , the layout describes where a particular element :math:`X(k_1, k_2, \dots, k_d )` of the data set is located. The memory address of the element :math:`X(k_1, k_2, \dots, k_d )` is expressed by the formula: -:math:`X(k_1, k_2, \dots, k_d ) =` the :math:`+ s_0 + k_1*s_1 + k_2*s_2 + \dots + k_d*s_d`-th element of the container (``sycl::buffer`` or USM pointer) provided to the compute function, -where :math:`s_0` is the displacement and :math:`s_1 , \dots, s_d` are generalized strides. The configuration parameters ``config_param::INPUT_STRIDES`` and ``config_param::OUTPUT_STRIDES`` enable you to get and set these values. The configuration value is a :math:`d+1`` lengthed ``std::vector`` of values :math:`(s_0, s_1, \dots, s_d )`. - -The offset is counted in elements of the data type (complex or real) defined by the descriptor configuration as tabulated below. - -The computation functions take containers(``sycl::buffer`` or USM pointer) which are typed according to the descriptor configuration parameters. Specifically, the :ref:`forward domain` which defines the type of transformation and the :ref:`storage format` configuration parameters: ``config_param::COMPLEX_STORAGE``, ``config_param::REAL_STORAGE`` and ``config_param::CONJUGATE_EVEN_STORAGE`` define the type of the elements as shown here: - -.. container:: section - - .. _onemkl_dft_config_data_element_types_complex: - - .. rubric:: Assumed Element Types using complex-to-complex transform and ``config_param::COMPLEX_STORAGE``: - - .. list-table:: - :header-rows: 1 - :class: longtable - - * - COMPLEX_STORAGE - - Element type of forward data - - Element type of backward data - * - :ref:`onemkl_dft_complex_storage_complex_complex` - - Complex - - Complex - * - :ref:`onemkl_dft_complex_storage_real_real` - - Real - - Real - - .. _onemkl_dft_config_data_element_types_real: - - .. rubric:: Assumed Element Types using real-to-complex transform and ``config_param::REAL_STORAGE``: - - .. list-table:: - :header-rows: 1 - :class: longtable - - * - REAL_STORAGE - - Element type of forward data - - Element type of backward data - * - :ref:`onemkl_dft_real_storage_real_real` - - Real - - Real - - .. _onemkl_dft_config_data_element_types_conjugate_even: - - .. rubric:: Assumed Element Types using real-to-complex transform and ``config_param::CONJUGATE_EVEN_STORAGE``: - - .. list-table:: - :header-rows: 1 - :class: longtable - - * - CONJUGATE_EVEN_STORAGE - - Element type of forward data - - Element type of backward data - * - :ref:`onemkl_dft_conjugate_even_storage_complex_complex` - - Real - - Complex - - -The ``config_param::INPUT_STRIDES`` configuration parameter defines the layout of the input data, while the element type is defined by the forward domain for the :ref:`onemkl_dft_compute_forward` function and by the backward domain for the :ref:`onemkl_dft_compute_backward` function. The ``config_param::OUTPUT_STRIDES`` configuration parameter defines the layout of the output data, while the element type is defined by the backward domain for the :ref:`onemkl_dft_compute_forward` function and by the forward domain for :ref:`onemkl_dft_compute_backward` function. - -For in-place transforms ( ``config_param::PLACEMENT=config_value::INPLACE`` ), the configuration set by ``config_param::OUTPUT_STRIDES`` is ignored when the element types in the forward and backward domains are the same. If they are different, set ``config_param::OUTPUT_STRIDES`` explicitly (even though the transform is in-place). Ensure a consistent configuration for in-place transforms, that is, the locations of the first elements on input and output must coincide in each dimension. - - -**Parent topic** :ref:`onemkl_dft_enums` - - diff --git a/source/elements/oneMKL/source/domains/dft/descriptor.rst b/source/elements/oneMKL/source/domains/dft/descriptor.rst index ea3defe88..57f6b1005 100644 --- a/source/elements/oneMKL/source/domains/dft/descriptor.rst +++ b/source/elements/oneMKL/source/domains/dft/descriptor.rst @@ -4,27 +4,48 @@ .. _onemkl_dft_descriptor: -descriptor -========== +The ``descriptor`` class +======================== -The descriptor class defines a discrete Fourier transform problem to be computed. +Objects of the ``descriptor`` class define DFT(s) to be computed. .. rubric:: Description -The discrete Fourier transform problem is defined through the use of the ``oneapi::mkl::dft::descriptor`` class which lives in the ``oneapi::mkl::dft::`` namespace. The enum and config_param values associated with the descriptor class can be found in :ref:`onemkl_dft_enums` including :ref:`onemkl_dft_enum_precision`, :ref:`onemkl_dft_enum_domain` and :ref:`onemkl_dft_enum_config_param`. The descriptor class allows to set several configuration parameters using set_value (and query using get_value) and then upon call to :ref:`onemkl_dft_descriptor_commit` with a ``sycl::queue``, is ready to be used in computations on the specified device. - -This class is then passed to a :ref:`onemkl_dft_compute_forward` or :ref:`onemkl_dft_compute_backward` function along with the data for the actual transformation to be applied. +Any desired (batched) DFT is to be fully determined by an object of the +``oneapi::mkl::dft::descriptor`` class, defined in the ``oneapi::mkl::dft`` +namespace. The scoped enumeration types :ref:`onemkl_dft_enum_precision`, +:ref:`onemkl_dft_enum_domain`, :ref:`onemkl_dft_enum_config_param` and +:ref:`onemkl_dft_enum_config_value` defined in the same namespace (and the +corresponding ranges of values) are relevant to the definition and +configurations of objects of the ``descriptor`` class. The ``descriptor`` class +allows the user to set several (resp. query all) configuration parameters for (resp. +from) any of its instances by using their +:ref:`onemkl_dft_descriptor_set_value` (resp. +:ref:`onemkl_dft_descriptor_get_value`) member function. + +Invoking the member function :ref:`onemkl_dft_descriptor_commit` of an object of +the ``descriptor`` class effectively commits that object to the desired DFT +calculations, as configured and determined by that very object, on the specified +device encapsulated by the ``sycl::queue`` object required by that function. + +The desired forward (resp. backward) DFT calculations may then be computed by +passing such a committed ``descriptor`` object to the +:ref:`onemkl_dft_compute_forward` (resp. :ref:`onemkl_dft_compute_backward`) +function (defined in the ``oneapi::mkl::dft`` namespace as well), along with the +relevant data containers (``sycl::buffer`` object(s) or pointer(s) to a +device-accessible USM allocations) for the desired DFT(s). This function makes +the ``descriptor`` object enqueue the operations relevant for the desired +calculations to the ``sycl::queue`` object it was given when committing it. .. note:: - The :ref:`onemkl_dft_compute_forward` and :ref:`onemkl_dft_compute_backward` functions may need to be able to access the internals of the descriptor to apply the transform, this could be done for instance, by labeling them as friend functions of the descriptor class. - - -descriptor class ----------------- + The :ref:`onemkl_dft_compute_forward` and :ref:`onemkl_dft_compute_backward` + functions may need to be able to access the internals of the ``descriptor`` + object to compute the desired transform(s), this could be done for instance, + by labeling them as friend functions of the ``descriptor`` class. .. rubric:: Syntax -The descriptor class lives in the ``oneapi::mkl::dft`` namespace. +The ``descriptor`` class is defined in the ``oneapi::mkl::dft`` namespace. .. code-block:: cpp @@ -34,11 +55,11 @@ The descriptor class lives in the ``oneapi::mkl::dft`` namespace. class descriptor { public: - // Syntax for 1-dimensional DFT - descriptor(std::int64_t length); + // Constructor for 1-dimensional DFT + descriptor(std::int64_t length); // d = 1; - // Syntax for d-dimensional DFT - descriptor(std::vector dimensions); + // Constructor for d-dimensional DFT + descriptor(std::vector lengths); // d = lengths.size(); descriptor(const descriptor&); @@ -51,9 +72,9 @@ The descriptor class lives in the ``oneapi::mkl::dft`` namespace. ~descriptor(); - void set_value(config_param param, ...); + void set_value(oneapi::mkl::dft::config_param param, ...); - void get_value(config_param param, ...); + void get_value(oneapi::mkl::dft::config_param param, ...); void commit(sycl::queue &queue); @@ -63,58 +84,73 @@ The descriptor class lives in the ``oneapi::mkl::dft`` namespace. } -.. container:: section - - .. rubric:: Descriptor class template parameters - - :ref:`onemkl_dft_enum_precision` prec - Specifies the floating-point precision in which the transform is to be carried out. - - :ref:`onemkl_dft_enum_domain` dom - Specifies the forward domain for the transformations. - -.. container:: section - - .. _onemkl_dft_descriptor_member_table: - - .. rubric:: Descriptor class member functions - - .. list-table:: - :header-rows: 1 - - * - Routines - - Description - * - :ref:`constructors` - - Initialize descriptor for 1-dimensional or N-dimensional transformations - * - :ref:`assignment operators` - - Assignment operator. - * - :ref:`onemkl_dft_descriptor_set_value` - - Sets one particular configuration parameter with the specified configuration value. - * - :ref:`onemkl_dft_descriptor_get_value` - - Gets the configuration value of one particular configuration parameter. - * - :ref:`onemkl_dft_descriptor_commit` - - Performs all initialization for the actual FFT computation. - +.. _onemkl_dft_descriptor_template_parameters: + +.. rubric:: Descriptor class template parameters + +:ref:`onemkl_dft_enum_precision` prec + Specifies the floating-point precision in which the user-provided data is to + be provided, the transform is to be carried out and the results are to be + returned. The possible specialization values are + ``oneapi::mkl::dft::precision::SINGLE`` and + ``oneapi::mkl::dft::precision::DOUBLE``. Objects of the ``descriptor`` class + specialized with :ref:`onemkl_dft_enum_precision` template parameter ``prec`` + as value ``oneapi::mkl::dft::precision::SINGLE`` (resp. + ``oneapi::mkl::dft::precision::DOUBLE``) are referred to as "single-precision + descriptors" (resp. "double-precision descriptors"). + +:ref:`onemkl_dft_enum_domain` dom + Specifies the forward domain of the transform. The possible specialization + values are ``oneapi::mkl::dft::domain::COMPLEX`` and + ``oneapi::mkl::dft::domain::REAL``. Objects of the ``descriptor`` class + specialized with :ref:`onemkl_dft_enum_domain` template parameter ``dom`` as + value ``oneapi::mkl::dft::precision::COMPLEX`` (resp. + ``oneapi::mkl::dft::precision::REAL``) are referred to as "complex + descriptors" (resp. "real descriptors"). + +.. _onemkl_dft_descriptor_member_table: + +.. rubric:: Descriptor class member functions + +.. list-table:: + :header-rows: 1 + :widths: 30 70 + + * - Routines + - Description + * - :ref:`constructors` + - Creates and default-initializes a ``descriptor`` object for a + :math:`d`-dimensional DFT of user-defined length(s) + :math:`\lbrace n_1, \ldots, n_d\rbrace`. + * - :ref:`assignment operators` + - Performs a deep copy of or moves the argument. + * - :ref:`onemkl_dft_descriptor_set_value` + - Sets a configuration value for a specific configuration parameter. + * - :ref:`onemkl_dft_descriptor_get_value` + - Queries the configuration value associated with a particular + configuration parameter. + * - :ref:`onemkl_dft_descriptor_commit` + - Commits the ``descriptor`` object to enqueue the operations relevant + to the (batched) DFT(s) it determines to a given, user-provided + ``sycl::queue`` object; completes all initialization work relevant to + and required by the chosen, device-compliant implementation for the + particular DFT, as defined by the ``descriptor`` object. .. _onemkl_dft_descriptor_constructor: Descriptor class constructors ++++++++++++++++++++++++++++++ -The constructors for the discrete Fourier transform ``descriptor`` class with default -configuration settings for a given precision, forward :ref:`onemkl_dft_enum_domain` type -and dimension of the transform. +The constructors for the ``descriptor`` object instantiate +it with all the relevant default configuration settings (which may depend on the +specialization values used for the :ref:`onemkl_dft_enum_precision` template +parameter ``prec`` and for the :ref:`onemkl_dft_enum_domain` template parameter +``dom``). The constructors do not perform any significant initialization work as +changes in the object's configuration(s) may be operated thereafter (via its +:ref:`onemkl_dft_descriptor_set_value` member function) and modify significantly +the nature of that work. -The constructors allocate memory for the descriptor data -structure and instantiate it with all the default -configuration settings for the precision, (forward) :ref:`onemkl_dft_enum_domain`, and -dimensions of the transform. The constructors do not perform any -significant computational work, such as computation of twiddle -factors. The function :ref:`onemkl_dft_descriptor_commit` does this work -after use of the function :ref:`onemkl_dft_descriptor_set_value` to set values -of all necessary parameters. - -The copy constructor performs a deep copy of the descriptor. +The copy constructor performs a deep copy of ``descriptor`` objects. .. rubric:: Syntax (one-dimensional transform) @@ -128,14 +164,14 @@ The copy constructor performs a deep copy of the descriptor. } -.. rubric:: Syntax (multi-dimensional transform) +.. rubric:: Syntax (:math:`d`-dimensional transform with :math:`d > 0`) .. code-block:: cpp namespace oneapi::mkl::dft { template - descriptor(std::vector dimensions); + descriptor(std::vector lengths); } @@ -167,25 +203,34 @@ The copy constructor performs a deep copy of the descriptor. .. rubric:: Input Parameters length - dimension(length) of data for a 1-dimensional transform. + Length :math:`n_1 > 0` of the data sequence(s) for one-dimensional + transform(s). - dimensions - vector of :math:`d\geq 0` dimensions(lengths) of data for a d-dimensional transform. + lengths + Vector of :math:`d > 0` lengths :math:`\lbrace n_1, \ldots, n_d\rbrace` + of the data sequence(s) for :math:`d`-dimensional transform(s). The values + are to be provided in that order and such that + :math:`n_j > 0,\ \forall j \in \lbrace 1, \ldots, d \rbrace`. other - another descriptor of the same type to copy or move + Another ``descriptor`` object of the same type to copy or move. .. container:: section .. rubric:: Throws - The `descriptor()` constructor shall throw the following exceptions if the associated condition is detected. An implementation may throw additional implementation-specific exception(s) in case of error conditions not covered here: + The ``descriptor::descriptor()`` constructors shall throw the following + exception if the associated condition is detected. An implementation may + throw additional implementation-specific exception(s) in case of error + conditions not covered here: :ref:`oneapi::mkl::host_bad_alloc()` - If any memory allocations on host have failed, for instance due to insufficient memory. + If any memory allocations on host have failed, for instance due to + insufficient memory. :ref:`oneapi::mkl::unimplemented()` - If length of ``dimensions`` vector is larger than is supported by the library implementation. + If the dimension :math:`d`, *i.e.*, the size of vector ``lengths``, is + larger than what is supported by the library implementation. **Descriptor class member table:** :ref:`onemkl_dft_descriptor_member_table` @@ -226,37 +271,42 @@ The copy assignment operator results in a deep copy. .. rubric:: Input Parameters other - The descriptor to copy or move from. + The ``descriptor`` object to copy or move from. .. container:: section .. rubric:: Throws - The assignment opererator shall throw the following exceptions if the associated condition is detected. An implementation may throw additional implementation-specific exception(s) in case of error conditions not covered here: + The assignment opererators shall throw the following exceptions if the + associated condition is detected. An implementation may throw additional + implementation-specific exception(s) in case of error conditions not covered + here: :ref:`oneapi::mkl::host_bad_alloc()` - If any memory allocations on host have failed, for instance due to insufficient memory. - + If any memory allocations on host have failed, for instance due to + insufficient memory. **Descriptor class member table:** :ref:`onemkl_dft_descriptor_member_table` .. _onemkl_dft_descriptor_set_value: set_value -++++++++++ - -Sets DFT configuration values before :ref:`onemkl_dft_descriptor_commit`. - - -.. rubric:: Description - -This function sets one particular configuration parameter with -the specified configuration value. Each configuration parameter -is a named constant, and the configuration value must have the -corresponding type, which can be a named constant or a native -type. For available configuration parameters and the -corresponding configuration values, see :ref:`onemkl_dft_enum_config_param`. -All calls to ``set_param`` must be done before :ref:`onemkl_dft_descriptor_commit`. ++++++++++ + +The ``set_value`` member function of the ``descriptor`` class sets a +configuration value corresponding to a (read-write) configuration parameter for +the DFT(s) that a ``descriptor`` object defines. This function is to be used as +many times as required for all the necessary configuration parameters to be set +prior to committing the ``descriptor`` object (by calling its member function +:ref:`onemkl_dft_descriptor_commit`). + +This function requires and expects exactly **two** arguments: it sets the +configuration value (second argument) corresponding to the configuration +parameter (first argument) ``param`` of type ``oneapi::mkl::dft::config_param``. +The type of the configuration value (second argument) to be set depends on the +value of ``param``: it can be ``oneapi::mkl::dft::config_value`` or a native +type like ``std::int64_t`` or ``float`` (more details available +:ref:`here`). .. rubric:: Syntax @@ -265,7 +315,7 @@ All calls to ``set_param`` must be done before :ref:`onemkl_dft_descriptor_commi namespace oneapi::mkl::dft { template - void descriptor::set_value(config_param param, ...); + void descriptor::set_value(oneapi::mkl::dft::config_param param, ...); } @@ -274,23 +324,31 @@ All calls to ``set_param`` must be done before :ref:`onemkl_dft_descriptor_commi .. rubric:: Input Parameters param - The enum value of :ref:`onemkl_dft_enum_config_param` to be set. + One of the possible values of type :ref:`onemkl_dft_enum_config_param` + representing the (writable) configuraton parameter to be set. ... - The corresponding value or container corresponding to the specific parameter. Defined in :ref:`onemkl_dft_enum_config_param`. + An element of the appropriate type for the configuration value + corresponding to the targeted configuration + parameter ``param`` (appropriate type defined + :ref:`here`). - .. container:: section .. rubric:: Throws - The `descriptor::set_value()` routine shall throw the following exceptions if the associated condition is detected. An implementation may throw additional implementation-specific exception(s) in case of error conditions not covered here: + The ``descriptor::set_value()`` routine shall throw the following exceptions + if the associated condition is detected. An implementation may throw + additional implementation-specific exception(s) in case of error conditions + not covered here: :ref:`oneapi::mkl::invalid_argument()` - If the provided :ref:`onemkl_dft_enum_config_param` or config_value is not valid. + If the provided :ref:`onemkl_dft_enum_config_param` and/or configuration + value is not valid. :ref:`oneapi::mkl::unimplemented()` - If the provided :ref:`onemkl_dft_enum_config_param` or config_value is valid, but not supported by the library implementation. + If the provided :ref:`onemkl_dft_enum_config_param` and configuration + value are valid, but not supported by the library implementation. **Descriptor class member table:** :ref:`onemkl_dft_descriptor_member_table` @@ -299,18 +357,26 @@ All calls to ``set_param`` must be done before :ref:`onemkl_dft_descriptor_commi .. _onemkl_dft_descriptor_get_value: get_value -++++++++++ ++++++++++ -Retrieves current DFT configuration values. +The ``get_value`` member function of the ``descriptor`` class queries the +configuration value corresponding to any configuration parameter for the DFT +that a ``descriptor`` object defines. -.. rubric:: Description +This function requires and expects exactly **two** arguments: it returns the +configuration value (into the element pointed by the second argument) +corresponding to the queried configuration parameter (first argument) ``param`` +of type ``oneapi::mkl::dft::config_param``. The type of the second argument +depends on the value of ``param``: it is a pointer to a writable element of +type ``oneapi::mkl::dft::domain``, ``oneapi::mkl::dft::precision``, +``oneapi::mkl::dft::config_value`` or a native type like ``std::int64_t`` or +``float`` (more details available :ref:`here`). -This function gets one particular configuration parameter with -the specified configuration value. Each configuration parameter -is a named constant, and the configuration value must have the -corresponding type, which can be a named constant or a native -type. For available configuration parameters and the -corresponding configuration values, see :ref:`onemkl_dft_enum_config_param`. +.. note:: + The value returned by ``get_value`` corresponds to the latest value set for + the corresponding configuration parameter being queried or the + corresponding default value if that parameter was not set or if it is not + writable, even if that value was set after the descriptor was committed. .. rubric:: Syntax @@ -319,7 +385,7 @@ corresponding configuration values, see :ref:`onemkl_dft_enum_config_param`. namespace oneapi::mkl::dft { template - void descriptor::get_value(config_param param, ...); + void descriptor::get_value(oneapi::mkl::dft::config_param param, ...); } @@ -328,50 +394,49 @@ corresponding configuration values, see :ref:`onemkl_dft_enum_config_param`. .. rubric:: Input Parameters param - The enum value of :ref:`onemkl_dft_enum_config_param` to be retrieved. + One of the possible values of type :ref:`onemkl_dft_enum_config_param` + representing the configuraton parameter being queried. ... - The corresponding value or container corresponding to the specific parameter. Defined in :ref:`onemkl_dft_enum_config_param`. + A pointer to a writable element of the appropriate type for the + configuration value corresponding to the queried configuration + parameter ``param`` (appropriate type of pointed element defined + :ref:`here`). .. container:: section .. rubric:: Throws - The `descriptor::get_value()` routine shall throw the following exceptions if the associated condition is detected. An implementation may throw additional implementation-specific exception(s) in case of error conditions not covered here: + The ``descriptor::get_value()`` routine shall throw the following exceptions + if the associated condition is detected. An implementation may throw + additional implementation-specific exception(s) in case of error conditions + not covered here: :ref:`oneapi::mkl::invalid_argument()` - If the requested :ref:`onemkl_dft_enum_config_param` is not correct. - - + If the requested :ref:`onemkl_dft_enum_config_param` is not valid. **Descriptor class member table:** :ref:`onemkl_dft_descriptor_member_table` - - .. _onemkl_dft_descriptor_commit: commit -+++++++ - -Finalizes DFT descriptor after all configuration parameters have been set. - -.. rubric:: Description - -This function completes initialization of a previously created -descriptor, which is required before the descriptor can be used -for FFT computations. Typically, committing the -descriptor performs all initialization that is required for the -actual FFT computation on the device specified through input queue. -The initialization performed by the function may involve exploring different -factorizations of the input length to find the optimal -computation method. - -All calls to the :ref:`onemkl_dft_descriptor_set_value` function to change configuration -parameters of a descriptor need to happen after the constructor call for -the :ref:`onemkl_dft_descriptor` class and before a call to :ref:`onemkl_dft_descriptor_commit`. -Typically, a commit function call is immediately followed by a computation -function call (see :ref:`onemkl_dft_compute_forward` or :ref:`onemkl_dft_compute_backward`) - +++++++ + +The ``commit`` member function commits a ``descriptor`` object to the DFT +calculations it defines consistently with its configuration settings, by +completing all the initialization work (*e.g.*, algorithm selection, algorithm +tuning, choice of factorization, memory allocations, calculation of twiddle +factors, etc.) required by the chosen implementation for the desired DFT(s) on +the targeted device. Objects of the ``descriptor`` class **must** be committed +prior to using them in any call to :ref:`onemkl_dft_compute_forward` or +:ref:`onemkl_dft_compute_backward` (which trigger actual DFT calculations). + +As specified :ref:`above`, all required +configuration parameters must be set before this function is called. Any change +in configuration operated on a ``descriptor`` object via a call to its +:ref:`onemkl_dft_descriptor_set_value` member function *after* it was committed +results in an undefined state not suitable for computation until this ``commit`` member +function is called again. .. rubric:: Syntax @@ -381,7 +446,6 @@ function call (see :ref:`onemkl_dft_compute_forward` or :ref:`onemkl_dft_compute template void descriptor::commit(sycl::queue& queue); - } .. container:: section @@ -389,31 +453,29 @@ function call (see :ref:`onemkl_dft_compute_forward` or :ref:`onemkl_dft_compute .. rubric:: Input Parameters queue - Valid DPC++ queue specifying the device and context on which the transformation will be executed. + Valid ``sycl::queue`` object to which the operations relevant to the + desired DFT(s) are to be enqueued. .. container:: section .. rubric:: Throws - The following oneMKL exceptions may be thrown in this function: - - The `descriptor::commit()` routine shall throw the following exceptions if the associated condition is detected. An implementation may throw additional implementation-specific exception(s) in case of error conditions not covered here: + The ``descriptor::commit()`` routine shall throw the following exceptions if + the associated condition is detected. An implementation may throw additional + implementation-specific exception(s) in case of error conditions not covered + here (if the ``descriptor`` object's configuration was found to be + inconsistent, for instance): :ref:`oneapi::mkl::invalid_argument()` If the queue is found to be invalid in any way. :ref:`oneapi::mkl::host_bad_alloc()` - If any host side only memory allocations fail, for instance due to lack of memory. + If any host side only memory allocations fail, for instance due to lack of + memory. :ref:`oneapi::mkl::device_bad_alloc()` If any device or shared memory allocation fail. - - **Descriptor class member table:** :ref:`onemkl_dft_descriptor_member_table` - **Parent topic:** :ref:`onemkl_dft` - - - diff --git a/source/elements/oneMKL/source/domains/dft/dft.rst b/source/elements/oneMKL/source/domains/dft/dft.rst index 792480479..efd1b3ad0 100644 --- a/source/elements/oneMKL/source/domains/dft/dft.rst +++ b/source/elements/oneMKL/source/domains/dft/dft.rst @@ -5,23 +5,181 @@ .. _onemkl_dft: Discrete Fourier Transform Functions -------------------------------------- +------------------------------------ -The general form of the d-dimensional discrete Fourier transform(DFT) is +oneMKL provides a DPC++ interface to :math:`d`-dimensional :math:`\left(d \in +\mathbb{Z}_{>0}\right)` Discrete Fourier Transforms (DFTs). -.. _onemkl_dft_formula: +.. _onemkl_dft_definitions: -.. math:: +Definitions ++++++++++++ - z_{k_1, k_2,\dots, k_d} = \sigma \displaystyle\sum_{j_d=0}^{n_d-1}\dots\displaystyle\sum_{j_2=0}^{n_2-1}\displaystyle\sum_{j_1=0}^{n_1-1} w_{j_1, j_2,\dots,j_d} \exp \left[ \delta 2\pi i\left( \sum_{\ell=1}^{d} \frac{j_{\ell}k_{\ell}}{n_{\ell}} \right) \right] . +Let :math:`w^{m}_{k_1, k_2, \ldots, k_d}` be the entry of multi-index +:math:`\left(k_1, k_2, \ldots, k_d\right) \in \mathbb{Z}^d` in the +:math:`m`-th sequence of a set :math:`w` of :math:`M` :math:`d`-dimensional +periodic discrete sequences of period(s) (or "length(s)") +:math:`n_1 \times n_2 \times \dots \times n_d` +(:math:`M \in \mathbb{Z}_{>0}`, :math:`m \in \lbrace 0, 1, \ldots, M-1\rbrace` +and +:math:`n_{\ell} \in \mathbb{Z}_{>0}, \forall \ell \in \lbrace 1, \ldots, d \rbrace`). -for :math:`k_{\ell} = 0,\dots, n_{\ell}-1` and :math:`\ell\in\{1, \dots, d\}`, where :math:`\sigma` is a scale factor, :math:`\delta=-1` for the forward transform, and :math:`\delta=+1` for the backward(inverse) transform. In the forward transform, the input sequence :math:`\left(w_{j_1, j_2, \dots, j_d}\right)` belongs to the set of complex-valued sequences or real-valued sequences. Respective domains for the backward transform are represented by complex-valued sequences or complex conjugate-even sequences. +For every :math:`m \in \lbrace 0, 1, \ldots, M - 1 \rbrace`, the DFT of sequence +:math:`w^{m}` is the :math:`d`-dimensional +:math:`n_1 \times n_2 \times \dots \times n_d` periodic discrete sequence +:math:`z^{m}` whose entries are defined as -The discrete Fourier transform to be performed is defined by the creation of a :ref:`onemkl_dft_descriptor` class, with the associated configuration parameters, described in :ref:`onemkl_dft_enums`. Once the descriptor class is defined and :ref:`onemkl_dft_descriptor_commit` is called and provided with a ``sycl::queue`` to define the device and context, it can be used for computing the forward and/or backward transformations. The available data storage formats for the various configurations are described in :ref:`onemkl_dft_config_storage_formats`. +.. _onemkl_dft_formula: +.. math:: + z^{m}_{k_1, k_2,\ldots, k_d} = \sigma \displaystyle\sum_{j_d=0}^{n_d-1}\dots\displaystyle\sum_{j_2=0}^{n_2-1}\displaystyle\sum_{j_1=0}^{n_1-1} w^{m}_{j_1, j_2,\dots,j_d} \exp \left[ \delta 2\pi \imath \left( \sum_{\ell=1}^{d} \frac{j_{\ell}k_{\ell}}{n_{\ell}} \right) \right] \ \forall \left(k_1, \ldots, k_d\right) \in \mathbb{Z}^{d} + :label: eq_dft_definition + +where :math:`\imath^2 = -1` and :math:`\sigma` is a scale factor. +In :eq:`eq_dft_definition`, :math:`\delta` determines one of the two +"directions" of the DFT: :math:`\delta=-1` defines the "forward DFT" while +:math:`\delta=+1` defines the "backward DFT". + +The domain of input (resp. output) discrete sequences for a forward (resp. +backward) DFT is referred to as "forward domain". Conversely, the domain of +output (resp. input) discrete sequences for forward (resp. backward) DFT is +referred to as "backward domain". + +oneMKL supports single-precision (fp32) and double-precision (fp64) +floating-point arithmetic for the calculation of DFTs, using two types of +forward domains: + +- the set of complex :math:`d`-dimensional periodic sequences, referred to as + "complex forward domain"; +- the set of real :math:`d`-dimensional periodic sequences, referred to as + "real forward domain". + +Similarly, we refer to DFTs of complex (resp. real) forward domain as "complex +DFTs" (resp. "real DFTs"). Regardless of the type of forward domain, the +backward domain's data sequences are alway complex. + +The calculation of the same DFT for several, *i.e.*, :math:`M > 1`, data sets of +the same type of forward domain, using the same precision is referred to as a +"batched DFT". + +.. _onemkl_dft_finite_range_of_indices: + +Finite range of indices ++++++++++++++++++++++++ + +In general, given the periodicity of the discrete data considered in any DFT, +ranges of indices :math:`\left(k_1, \ldots, k_d\right) \in \mathbb{Z}^{d}` such +that :math:`0\leq k_{\ell} < n_{\ell}, \forall \ell \in \lbrace 1, \ldots, d +\rbrace` suffice to determine any relevant :math:`d`-dimensional sequence +unambiguously (for any valid :math:`m`). In case of real DFTs, the data +sequences in backward domain can be fully determined from a smaller range of +indices. Indeed, if all entries of :math:`w` are real in +:eq:`eq_dft_definition`, then the entries of :math:`z` are complex and, for any +valid :math:`m`, +:math:`\left(z^{m}_{k_1, k_2, \dots, k_d}\right)^{*} = z^{m}_{n_1 - k_1, n_2 - k_2, \dots, n_d - k_d}` +:math:`\forall \left(k_1, k_2, \ldots, k_d\right) \in \mathbb{Z}^{d}` where +:math:`\lambda^{*}` represents the conjugate of complex number :math:`\lambda`. +This conjugate symmetry relation makes roughly half the data redundant in +backward domain: in case of real DFTs, the data sequences in backward domain can +be fully determined even if one of the :math:`d` indices :math:`k_{\ell}` is +limited to the range +:math:`0\leq k_{\ell} \leq \lfloor \frac{n_{\ell}}{2}\rfloor`. In oneMKL, the +index :math:`k_d`, *i.e.*, the last dimension's index, is restricted as such for +capturing an elementary set of non-redundant entries of data sequences belonging +to the backward domain of real DFTs. + +.. _onemkl_dft_elementary_range_of_indices: + +Elementary range of indices +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In other words, oneMKL expects and produces a set of :math:`M` +:math:`d`-dimensional *finite* data sequences +:math:`\left(\cdot \right)^{m}_{k_1, k_2,\ldots, k_d}` with integer indices +:math:`m` and +:math:`k_{\ell}\ \left(\ell \in \lbrace 1, \ldots, d \rbrace\right)` in the +elementary range + +- :math:`0 \leq m < M`; +- :math:`0 \leq k_j < n_j,\ \forall j \in \lbrace1, \ldots, d - 1\rbrace`, if :math:`d > 1`; +- :math:`0 \leq k_d < n_d`, except for backward domain's data sequences of real DFTs; +- :math:`0 \leq k_d \leq \lfloor\frac{n_d}{2}\rfloor`, for backward domain's data sequences of real DFTs. + +.. _onemkl_dft_additional_constraints_in_bwd_domain_for_real_dft: + +Additional constraints for data in backward domain of real DFTs +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Finally, note that the conjugate symmetry relation further constrains some of +the entries (or pairs thereof) in the backward domain's data sequences for real +DFTs. Specifically, for any of the :math:`M` sequences, + +- the imaginary part must be :math:`0` for any entry of multi-index + :math:`\left(k_1, k_2, \ldots, k_d\right)` such that + :math:`k_{\ell} \equiv \left(n_{\ell} - k_{\ell}\right) \pmod {n_{\ell}}, \forall \ell \in \lbrace{1, \ldots, d\rbrace}`, + *e.g.*, entry of multi-index :math:`\left(0, 0, \ldots, 0\right)`; +- pairs of entries of multi-indices :math:`\left(k_1, k_2, \ldots, k_d\right)` + and :math:`\left(j_1, j_2, \ldots, j_d\right)` such that + :math:`k_{\ell} \equiv \left(n_{\ell} - j_{\ell}\right) \pmod {n_{\ell}}, \forall \ell \in \lbrace{1, \ldots, d\rbrace}` + must be complex conjugates of one another, *e.g.*, entries of multi-indices + :math:`\left(1, 0, \ldots, 0\right)` and + :math:`\left(n_1 - 1, 0, \ldots, 0\right)` must be complex conjugates (note + that this case falls back to the above constraint if :math:`n_1 = 2`). + +.. note:: + The behavior of oneMKL is undefined for real backward DFT if the input data + does not satisfy those constraints. oneMKL considers it the user's + responsibility to guarantee that these constraints are satisfied by the input + data for real backward DFTs. + +.. _onemkl_dft_recommended_usage: + +Recommended usage ++++++++++++++++++ + +The desired (batched) DFT to be computed is entirely defined by an object +``desc`` of the :ref:`descriptor` class. The desired type +of forward domain and precision are determined at ``desc``'s construction time +by the specialization values chosen for the self-explanatory template parameters +``prec`` (of type :ref:`onemkl_dft_enum_precision`) and ``dom`` (of type +:ref:`onemkl_dft_enum_domain`), respectively. The transform size +:math:`n_1 \times n_2 \times \dots \times n_d` is also set at construction time +as a required argument to the class constructor. Other configuration details for +the (batched) DFT under consideration may be specified by invoking the +:ref:`onemkl_dft_descriptor_set_value` member function of ``desc`` for every +relevant configuration setting (*e.g.*, the number :math:`M` of sequences to +consider in case of a batched DFT). Once configured as desired, the +:ref:`onemkl_dft_descriptor_commit` member function of ``desc``, requiring a +``sycl::queue`` object ``Q``, may be invoked. The successful completion of the +latter makes ``desc`` comitted to the desired (batched) DFT *as configured*, for +the particular device and context encapsulated by ``Q``. The +:ref:`onemkl_dft_compute_forward` (resp. :ref:`onemkl_dft_compute_backward`) +function may then be called and provided with ``desc`` to enqueue operations +relevant to the desired forward (resp. backward) DFT calculations with +user-provided, device-accessible data. + +.. note:: + Objects of the :ref:`descriptor` class + + - must be successfully committed prior to providing them to any compute + function; + - must be re-committed to account for any change in configuration after + it was already successfully committed; + - deliver best performance for DFT calculations when created, configured and + comitted outside applications' hotpath(s) that use them multiple times for + identically-configured (batched) DFTs. :ref:`onemkl_dft_compute_forward` + and/or :ref:`onemkl_dft_compute_backward` should be the only oneMKL DFT-related + routines invoked in programs' hotpaths. + +Summary table +~~~~~~~~~~~~~ + +The table below summarizes the object and functions relevant to computing DFTs +(all defined in the ``oneapi::mkl::dft`` namespace). + +.. _onemkl_dft_summary_table: .. container:: - The routines and objects associated with computing a discrete Fourier transform. .. container:: tablenoborder @@ -33,18 +191,32 @@ The discrete Fourier transform to be performed is defined by the creation of a : * - Routines and Objects - Description - * - :ref:`onemkl_dft_descriptor` - - A class to define the specific discrete Fourier transform problem to be applied. + * - :ref:`descriptor` + - A class whose instances define a specific (batched) DFT(s) to + be calculated. * - :ref:`descriptor::set_value` - - A member function of descriptor class to set non-default configuration parameters and define the DFT transformation to be applied. + - A member function of the + :ref:`descriptor` class to set (writable) + :ref:`configuration parameters` + for an instance of that class. * - :ref:`descriptor::get_value` - - A member function of descriptor class to query configuration parameters that define the DFT transformation to be applied. + - A member function of the + :ref:`descriptor` class to query + :ref:`configuration parameters` + from any instance of that class. * - :ref:`descriptor::commit` - - A member function of descriptor class to finalize the DFT descriptor before computations. + - A member function of the + :ref:`descriptor` class to commit an + instance of that class to the (batched) DFT calculations it + defines, on a given queue. * - :ref:`onemkl_dft_compute_forward` - - Computes the in-place/out-of-place forward transformation. + - A function requiring a successfully-committed object of the + :ref:`descriptor` class to compute a + forward (batched) DFT, as defined by that object. * - :ref:`onemkl_dft_compute_backward` - - Computes the in-place/out-of-place backward transformation. + - A function requiring a successfully-committed object of the + :ref:`descriptor` class to compute a + backward (batched) DFT, as defined by that object. **Parent topic:** :ref:`onemkl_domains` @@ -52,8 +224,8 @@ The discrete Fourier transform to be performed is defined by the creation of a : .. toctree:: :hidden: - enums_and_config_params descriptor + enums_and_config_params compute_forward compute_backward diff --git a/source/elements/oneMKL/source/domains/dft/enums_and_config_params.rst b/source/elements/oneMKL/source/domains/dft/enums_and_config_params.rst index fb597d341..8bc248210 100644 --- a/source/elements/oneMKL/source/domains/dft/enums_and_config_params.rst +++ b/source/elements/oneMKL/source/domains/dft/enums_and_config_params.rst @@ -4,34 +4,50 @@ .. _onemkl_dft_enums: -Configuration Parameters and Enums +DFT-related scoped enumeration types ------------------------------------ -The following enum classes are defined in the ``oneapi::mkl::dft`` namespace which are used for configuring the discrete Fourier transform problem in the :ref:`onemkl_dft_descriptor` class prior to a call to :ref:`onemkl_dft_descriptor_commit`. +The following scoped enumeration types, defined in the ``oneapi::mkl::dft`` +namespace, are used for constructing and configuring objects of the +:ref:`descriptor` class consistently with the DFT(s) they +are meant to define. .. list-table:: :header-rows: 1 + :widths: 24 73 - * - enum class + * - Scoped enumeration type - Description * - :ref:`onemkl_dft_enum_precision` - - The floating-point precision in which the transform is carried out. Used as a template argument for :ref:`onemkl_dft_descriptor` class. + - Represents the precision of the floating-point data format and of + the floating-point arithmetic to be used for the desired DFT + calculations. A template parameter ``prec`` of this type is used for + the :ref:`descriptor` class. * - :ref:`onemkl_dft_enum_domain` - - The forward domain data type for dft transformation. Used as a template argument for :ref:`onemkl_dft_descriptor` class. + - Represents the type of forward domain for the desired DFT(s). A + template parameter ``dom`` of this type is used for the + :ref:`descriptor` class. * - :ref:`onemkl_dft_enum_config_param` - - The configuration parameters to specify the DFT transformation desired. These can be set and retrieved via the :ref:`onemkl_dft_descriptor_set_value` and :ref:`onemkl_dft_descriptor_get_value` functions. + - Represents configuration parameters for objects of the + :ref:`descriptor` class. The configuration + values associated with the configuration parameters + can be retrieved (resp. set, for writable parameters) via the object's + :ref:`onemkl_dft_descriptor_get_value` (resp. + :ref:`onemkl_dft_descriptor_set_value`) member function. * - :ref:`onemkl_dft_enum_config_value` - - Some possible enum values that the :ref:`onemkl_dft_enum_config_param` configuration parameters can take on. - - - + - Represents the possible configuration values for some of the + :ref:`configuration parameters` that + may take only a few determined, non-numeric values. .. _onemkl_dft_enum_precision: precision +++++++++ -The floating-point precision in which the transform is to be carried out. The data must be presented in this precision, the computation is carried out in this precision, and the result is delivered in this precision. +This scoped enumeration type represents the precision of the floating-point +format to be used for the desired DFT(s). The same precision is to be used for +the user-provided data, the computation being carried out by oneMKL and the +results delivered by oneMKL. .. container:: section @@ -46,21 +62,25 @@ The floating-point precision in which the transform is to be carried out. The d .. list-table:: :header-rows: 1 + :widths: 24 73 * - Value - Description * - SINGLE - - data and transforms are executed using single(fp32) precision + - Single-precision floating-point format (FP32) is used for data + representation and arithmetic operations. * - DOUBLE - - data and transforms are executed using double(fp64) precision - + - Double-precision floating-point format (FP64) is used for data + representation and arithmetic operations. .. _onemkl_dft_enum_domain: domain ++++++ -The discrete Fourier transform supports forward transformations on input sequences of two domains, from the forward domain to the backward domain. The backward transformation operates on input sequences from the backward domain to the forward domain. This ``domain`` value defines the forward domain and the backward domain is always implied to be complex-valued. +This scoped enumeration type represents the type of forward domain for the +desired DFTs (as explained in the :ref:`introduction`, +the backward domain type is always complex). .. container:: section @@ -75,19 +95,16 @@ The discrete Fourier transform supports forward transformations on input sequenc .. list-table:: :header-rows: 1 + :widths: 24 73 * - Value - - Forward domain - - Backward domain - Description * - REAL - - real-valued - - complex-valued - - Forward transformation is real-to-complex, backward transform is complex-to-real. + - The forward domain is the set of real :math:`d`-dimensional periodic + sequences. * - COMPLEX - - complex-valued - - complex-valued - - Forward and backward transformations are complex-to-complex. + - The forward domain is the set of complex :math:`d`-dimensional + periodic sequences. .. _onemkl_dft_enum_config_param: @@ -95,141 +112,188 @@ The discrete Fourier transform supports forward transformations on input sequenc config_param ++++++++++++ +This scoped enumeration type represents configuration parameters for objects of +the :ref:`descriptor` class. + .. container:: section .. code:: cpp enum class config_param { - + // read-only parameters: FORWARD_DOMAIN, DIMENSION, LENGTHS, PRECISION, - + COMMIT_STATUS, + // writable parameters: FORWARD_SCALE, BACKWARD_SCALE, NUMBER_OF_TRANSFORMS, COMPLEX_STORAGE, - REAL_STORAGE, - CONJUGATE_EVEN_STORAGE, PLACEMENT, - INPUT_STRIDES, - OUTPUT_STRIDES, + FWD_STRIDES, + BWD_STRIDES, + INPUT_STRIDES, // deprecated + OUTPUT_STRIDES, // deprecated FWD_DISTANCE, - BWD_DISTANCE, - - WORKSPACE, - ORDERING, - TRANSPOSE, - PACKED_FORMAT, - COMMIT_STATUS + BWD_DISTANCE }; - Many of the config_param enum's will take values in :ref:`onemkl_dft_enum_config_value` or other ``std::int64_t``, ``std::vector``, or floating-point :ref:`onemkl_dft_enum_precision` values as specified in the following table. + Configuration parameters represented by ``config_param::FORWARD_DOMAIN`` and + ``config_param::PRECISION`` are associated with configuration values of type + :ref:`domain` and + :ref:`precision` respectively. Other + configuration parameters are associated with configuration values of type + :ref:`onemkl_dft_enum_config_value` or of a native type like + ``std::int64_t``, ``std::vector``, ``float`` or ``double``. + This is further specified in the following table. .. list-table:: :header-rows: 1 - - * - Value - - Description + :widths: 10 50 40 + + * - | Value of ``config_param`` + | + - | Represented configuration parameter(s) + | + - | Type of associated configuration value + | [default value] * - FORWARD_DOMAIN - - Read-only value of forward :ref:`onemkl_dft_enum_domain` set at :ref:`onemkl_dft_descriptor` construction time. + - Type of forward domain, set at construction time as the + specialization value of :ref:`onemkl_dft_enum_domain` template + parameter ``dom``. This parameter is read-only. + - | :ref:`onemkl_dft_enum_domain` + | [``dom``] * - DIMENSION - - Read-only value of the dimension of the transformation. Value is a positive integer of type ``std::int64_t`` set at :ref:`onemkl_dft_descriptor` construction. + - Value of the dimension :math:`d` of the desired DFTs, set at + construction time. This parameter is read-only. + - | ``std::int64_t`` + | [:math:`d`] * - LENGTHS - - For a one-dimensional transform, the transform length is specified by a positive integer value represented in an integer scalar (``std::int64_t``). For multi-dimensional (:math:`\geq 2`) transform, the lengths of each of the dimensions are supplied in an integer vector (``std::vector``) at :ref:`onemkl_dft_descriptor` construction time. + - Values :math:`\lbrace n_1, \ldots, n_d\rbrace` of the periods (or + "lengths") of the desired DFT, set at construction time. This + parameter is read-only. + - | ``std::vector`` of size :math:`d` or, if :math:`d = 1`, ``std::int64_t`` + | [``std::vector({n_1,...,n_d})``] * - PRECISION - - Read-only value of :ref:`onemkl_dft_enum_precision` set at :ref:`onemkl_dft_descriptor` construction time. - * - :ref:`FORWARD_SCALE` - - The forward transform is associated with a scale factor, :math:`\sigma`, of real floating-point type :ref:`onemkl_dft_enum_precision`, the default value is 1.0. - * - :ref:`BACKWARD_SCALE` - - The backward transform is associated with a scale factor, :math:`\sigma`, of real floating-point type :ref:`onemkl_dft_enum_precision`, the default value is 1.0. - * - :ref:`NUMBER_OF_TRANSFORMS` - - If you need to perform a large number of identical DFTs, you can do this in a single call to a compute_forward function with the value of this equal to the actual number of the transforms. Takes a value of ``std::int64_t`` with default value of 1. - * - :ref:`onemkl_dft_complex_storage` - - Specifies the data storage format for :ref:`onemkl_dft_enum_domain` with value of ``COMPLEX``. - * - :ref:`onemkl_dft_real_storage` - - Specifies the data storage format for :ref:`onemkl_dft_enum_domain` with value of ``REAL``. - * - :ref:`onemkl_dft_conjugate_even_storage` - - Specifies the data storage format using conjugate-even symmetry of the data which allows to store only half of the mathematical results. - * - PLACEMENT - - Choose between in-place(value is ``config_value::INPLACE``) and out-of-place (value is ``config_value::NOT_INPLACE``) transformations. For in-place transformation, the computational functions overwrite the input data with the output results. The default is ``config_value::INPLACE``. When the configuration parameter is set to ``config_value::NOT_INPLACE``, the input and output data sets must have no common elements. - * - :ref:`INPUT_STRIDES` - - Defines the layout of multi-dimensional input data in computer memory. The value for a d-dimensional dataset is a d-dimensional vector of type ``std::vector`` representing offsets of elements of the appropriate data type as specified in :ref:`onemkl_dft_config_strides`. - * - :ref:`OUTPUT_STRIDES` - - Defines the layout of multi-dimensional output data in computer memory. The value for a d-dimensional dataset is a d-dimensional vector of type ``std::vector`` representing offsets of elements of the appropriate data type as specified in :ref:`onemkl_dft_config_strides`. - * - :ref:`FWD_DISTANCE` - - If computing multiple(batched) transforms, this parameter specifies the distance (in elements) between the first data elements of consecutive data sets in the forward domain. Provided in type ``std::int64_t``, the default value is 1. - * - :ref:`BWD_DISTANCE` - - If computing multiple(batched) transforms, this parameter specifies the distance (in elements) between the first data elements of consecutive data sets in the backward domain. Provided in type ``std::int64_t``, the default value is 1. - * - WORKSPACE - - Some FFT algorithm computation steps require a scratch space for permutations or other purposes. To manage the use of auxiliary storage, set to ``config_value::ALLOW`` to permit the use of auxiliary storage and ``config_value::AVOID`` to avoid using auxiliary storage if possible. - * - ORDERING - - Some FFT algorithms apply an explicit permutation stage that can be time consuming. The value of ``config_value::ORDERED`` (default) applies the data ordering for all transformations. The value of ``config_value::BACKWARD_SCRAMBLED`` applies ordering for forward transform, but allows backward transform to have scrambled data if it gives a performance advantage. - * - TRANSPOSE - - A boolean value to indicate providing the transposition of output results (for multi-dimensional transforms). Default value is ``false``. - * - :ref:`PACKED_FORMAT` - - Packing format for complex domain data storage of finite conjugate-even sequences from real-to-complex or complex-to-real transformations. + - Floating-point precision to be considered by and used for the DFT + calculation(s), set at construction time as the specialization value + of :ref:`onemkl_dft_enum_precision` template parameter ``prec``. + This parameter is read-only. + - | :ref:`onemkl_dft_enum_precision` + | [``prec``] * - COMMIT_STATUS - - Read-only value indicates whether the descriptor is ready for computation after a successful :ref:`onemkl_dft_descriptor_commit`. Value of ``config_value::COMMITTED`` indicates a successful call to :ref:`onemkl_dft_descriptor_commit`. A value of ``config_value::UNCOMMITTED`` (default) is set after descriptor constructor call and before successful call to :ref:`onemkl_dft_descriptor_commit`. + - Status flag indicating whether the object is ready for computations + after a successful call to :ref:`onemkl_dft_descriptor_commit`. This + parameter is read-only. + - | :ref:`onemkl_dft_enum_config_value` (possible values are self-explanatory ``config_value::COMMITTED`` or ``config_value::UNCOMMITTED``). + | [``config_value::UNCOMMITTED``] + * - FORWARD_SCALE + - Value of :math:`\sigma` for the forward DFT. + - | ``float`` (resp. ``double``) for single-precision (resp. double-precision) descriptors + | [1.0] + * - BACKWARD_SCALE + - Value of :math:`\sigma` for the backward DFT. + - | ``float`` (resp. ``double``) for single-precision (resp. double-precision) descriptors + | [1.0] + * - :ref:`NUMBER_OF_TRANSFORMS` + - Value of :math:`M`. This is relevant (and *must* be set) for + batched DFT(s), *i.e.*, if :math:`M > 1`. + - | ``std::int64_t`` + | [1] + * - :ref:`COMPLEX_STORAGE` + - Data storage type used (relevant for complex descriptors only). + - | :ref:`onemkl_dft_enum_config_value` (possible values are ``config_value::COMPLEX_COMPLEX`` or ``config_value::REAL_REAL``) + | [``config_value::COMPLEX_COMPLEX``] + * - PLACEMENT + - | Parameter specifying whether the DFT calculations should be done in-place (results overwriting the input data) or out-of-place (input and output in separate data containers having no common elements). + | Note: even for out-of-place configurations, some implementations may not preserve the original input data. + - | :ref:`onemkl_dft_enum_config_value` (possible values are self-explanatory ``config_value::INPLACE`` or ``config_value::NOT_INPLACE``) + | [``config_value::INPLACE``] + * - :ref:`FWD_STRIDES` + - Offset and strides defining the layout within a given data sequence + in the forward domain. + - | ``std::vector`` of size :math:`(d+1)` + | [defined :ref:`here`] + * - :ref:`BWD_STRIDES` + - Offset and strides defining the layout within a given data sequence + in the backward domain. + - | ``std::vector`` of size :math:`(d+1)` + | [defined :ref:`here`] + * - :ref:`INPUT_STRIDES` (deprecated) + - Offset and strides defining the layout within a given *input* data + sequence. + - | ``std::vector`` of size :math:`(d+1)` + | [``std::vector(d+1, 0)``] + * - :ref:`OUTPUT_STRIDES` (deprecated) + - Offset and strides defining the layout within a given *output* data + sequence. + - | ``std::vector`` of size :math:`(d+1)` + | [``std::vector(d+1, 0)``] + * - :ref:`FWD_DISTANCE` + - Distance in number of elements of + :ref:`implicitly-assumed data type` + between forward-domain entries + :math:`\left(\cdot\right)^{m}_{k_1, k_2, \ldots, k_d}` and + :math:`\left(\cdot\right)^{m + 1}_{k_1, k_2, \ldots, k_d}` for all + :math:`0\leq m < M - 1` and + :math:`\left(k_1, k_2, \ldots, k_d\right)` in + :ref:`valid range`. This is + relevant (and *must* be set) for batched DFT(s), *i.e.*, if + :math:`M > 1`. + - | ``std::int64_t`` + | [0] + * - :ref:`BWD_DISTANCE` + - Distance in number of elements of + :ref:`implicitly-assumed data type` + between backward-domain entries + :math:`\left(\cdot\right)^{m}_{k_1, k_2, \ldots, k_d}` and + :math:`\left(\cdot\right)^{m + 1}_{k_1, k_2, \ldots, k_d}` for all + :math:`0\leq m < M - 1` and + :math:`\left(k_1, k_2, \ldots, k_d\right)` in + :ref:`valid range`. This is + relevant (and *must* be set) for batched DFT(s), *i.e.*, if + :math:`M > 1`. + - | ``std::int64_t`` + | [0] .. _onemkl_dft_enum_config_value: config_value ++++++++++++ -These are some of the non-integer/floating-point values that the :ref:`onemkl_dft_enum_config_param` configuration parameters can take on. +This scoped enumeration type represents possible non-numeric configuration +values associated with some +:ref:`configuration parameters`. .. container:: section .. code:: cpp enum class config_value { - // for config_param::COMMIT_STATUS COMMITTED, UNCOMMITTED, // for config_param::COMPLEX_STORAGE, - // config_param::REAL_STORAGE and - // config_param::CONJUGATE_EVEN_STORAGE COMPLEX_COMPLEX, - REAL_COMPLEX, REAL_REAL, // for config_param::PLACEMENT INPLACE, - NOT_INPLACE, - - // for config_param::ORDERING - ORDERED, - BACKWARD_SCRAMBLED, - - // Allow/avoid certain usages - ALLOW, - AVOID, - NONE, - - // for config_param::PACKED_FORMAT for storing conjugate-even finite sequence in real containers - CCE_FORMAT - + NOT_INPLACE }; - - **Parent topic:** :ref:`onemkl_dft` .. toctree:: :hidden: - config_params/scaling_factor - config_params/number_of_transforms + config_params/data_layouts config_params/storage_formats - config_params/strides - config_params/distance - diff --git a/source/elements/oneMKL/source/domains/lapack/ormtr.rst b/source/elements/oneMKL/source/domains/lapack/ormtr.rst index c5595437e..babd3b380 100644 --- a/source/elements/oneMKL/source/domains/lapack/ormtr.rst +++ b/source/elements/oneMKL/source/domains/lapack/ormtr.rst @@ -100,7 +100,7 @@ lda The leading dimension of ``a`` :math:`(\max(1, r) \le \text{lda})`. tau - The buffer ``tau`` as returned bya :ref:`onemkl_lapack_sytrd`. The + The buffer ``tau`` as returned by a :ref:`onemkl_lapack_sytrd`. The dimension of ``tau`` must be at least :math:`\max(1, r-1)`. c diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-distributions-template-parameter-method.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-distributions-template-parameter-method.rst new file mode 100644 index 000000000..20f93b299 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-distributions-template-parameter-method.rst @@ -0,0 +1,52 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_distributions_method: + +Distributions Template Parameter Method +======================================= + +.. tabularcolumns:: |\Y{0.4}|\Y{0.2}|\Y{0.4}| + +.. list-table:: + :header-rows: 1 + :class: longtable + + * - Method Type + - Distributions + - Math Description + + * - ``uniform_method::standard`` + ``uniform_method::accurate`` + - ``uniform`` + - Standard method. ``uniform_method::accurate`` checks for additional ``float`` and ``double`` data types. + For ``integer`` data types, it uses ``double`` as a ``BRNG`` data type (``float`` ``BRNG`` data type is used in + ``uniform_method::standard`` method on GPU). + * - ``gaussian_method::box_muller2`` + - ``gaussian`` + - Generates normally distributed random numbers `x1` and `x2` through the pair of uniformly distributed numbers `u1` and `u2` according to + the formulas: :math:`x_1 = \sqrt{-2 \ln u_1} \sin {2 \pi u_2}`\ :math:`x_2 = \sqrt{-2 \ln u_1} \cos {2 \pi u_2}`\ + * - ``exponential_method::icdf`` + ``exponential_method::icdf_accurate`` + - ``exponential`` + - Inverse cumulative distribution function (ICDF) method. + * - ``lognormal_method::box_muller2`` + - ``lognormal`` + - Normally distributed random numbers `x1` and `x2` are produced through the pair of uniformly distributed numbers `u1` and `u2` according to the formulas: + :math:`x_1 = -2 \ln u_1 \sin {2 \pi u_2}`\ \ :math:`x_2 = -2 \ln u_1 \cos {2 \pi u_2}`\ + + Then `x1` and `x2` are converted to lognormal distribution. + * - ``bernoulli_method::icdf`` + - ``bernoulli`` + - Inverse cumulative distribution function (ICDF) method. + * - ``poisson_method::devroye`` + - ``poisson`` + - Acceptance/rejection method for :math:`\lambda \geq 27` with decomposition into four regions: + + * Two parallelograms + * Triangle + * Left exponential tail + * Right exponential tail + +`NOTE:` Methods provided for exposition purposes. diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-distributions.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-distributions.rst new file mode 100644 index 000000000..836d02f2f --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-distributions.rst @@ -0,0 +1,92 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_distributions: + +Device Distributions +==================== + +oneMKL RNG routines are used to generate random numbers with different types of distributions. Each function group is +introduced below by the type of underlying distribution and contains a short description of its functionality, as well +as specifications of the call sequence and the explanation of input and output parameters. The Device Continuous +Distribution Generators table and Device Discrete Distribution Generators table mention random number generator routines +with data types and output distributions, and sets correspondence between data types of the generator routines and the +basic random number generators. + +**Device Continuous Distribution Generators** + +.. list-table:: + :header-rows: 1 + + * - Type of Distribution + - Data Types + - BRNG Data Type + - Description + * - :ref:`onemkl_device_rng_uniform_continuous` + - float, double + - float, double + - Uniform continuous distribution on the interval [``a,b``) + * - :ref:`onemkl_device_rng_gaussian` + - float, double + - float, double + - Normal (Gaussian) distribution + * - :ref:`onemkl_device_rng_exponential` + - float, double + - float, double + - Exponential distribution + * - :ref:`onemkl_device_rng_lognormal` + - float, double + - float, double + - Lognormal distribution + + +**Device Discrete Distribution Generators** + +.. list-table:: + :header-rows: 1 + + * - Type of Distribution + - Data Types + - BRNG Data Type + - Description + * - :ref:`onemkl_device_rng_uniform_discrete` + - integer + - float + - Uniform discrete distribution on the interval [``a,b``) + * - :ref:`onemkl_device_rng_bits` + - integer + - integer + - Bits of underlying BRNG integer sequence + * - :ref:`onemkl_device_rng_uniform_bits` + - integer + - integer + - Uniformly distributed bits in 32/64-bit chunks + * - :ref:`onemkl_device_rng_poisson` + - integer + - integer + - Poisson distribution + * - :ref:`onemkl_device_rng_bernoulli` + - integer + - integer + - Bernoulli distribution + +`NOTE:` In case of ``integer`` check desired distribution for supported data types. + +**Parent topic:** :ref:`onemkl_device_rng_routines` + +.. toctree:: + :maxdepth: 1 + :hidden: + + device-distributions-template-parameter-method.rst + device-rng-uniform-continuous.rst + device-rng-gaussian.rst + device-rng-lognormal.rst + device-rng-exponential.rst + device-rng-uniform-discrete.rst + device-rng-bits.rst + device-rng-uniform-bits.rst + device-rng-poisson.rst + device-rng-bernoulli.rst + diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-engines.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-engines.rst new file mode 100644 index 000000000..794f089f5 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-engines.rst @@ -0,0 +1,42 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_engines: + +Device Engines (Basic Random Number Generators) +=============================================== + +oneMKL RNG provides following device pseudorandom number generators: + +.. tabularcolumns:: |\Y{0.4}|\Y{0.6}| + +.. list-table:: + :header-rows: 1 + :class: longtable + + * - Routine + - Description + + * - :ref:`onemkl_device_rng_mrg32k3a` + - The combined multiple recursive pseudorandom number generator ``MRG32k3a`` [:ref:`L'Ecuyer99 `] + + * - :ref:`onemkl_device_rng_philox4x32x10` + - Philox4x32-10 counter-based pseudorandom number generator with a period of :math:`2^{128}` ``PHILOX4X32X10`` [:ref:`Salmon11 `] + + * - :ref:`onemkl_device_rng_mcg31m1` + - The 31-bit multiplicative congruential pseudorandom number generator MCG(:math:`1132489760, 2^{32}-1`) :ref:`[L'Ecuyer99a] `. + + * - :ref:`onemkl_device_rng_mcg59` + - The 59-bit multiplicative congruential pseudorandom number generator MCG(:math:`13^{13}, 2^{59}`) from NAG Numerical Libraries :ref:`[NAG] `. + +**Parent topic:** :ref:`onemkl_device_rng_routines` + +.. toctree:: + :maxdepth: 1 + :hidden: + + device-rng-mrg32k3a.rst + device-rng-philox4x32x10.rst + device-rng-mcg31m1.rst + device-rng-mcg59.rst \ No newline at end of file diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-bernoulli.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-bernoulli.rst new file mode 100644 index 000000000..4bff82aaf --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-bernoulli.rst @@ -0,0 +1,185 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_bernoulli: + +bernoulli +========= + +Generates Bernoulli distributed random values. + +.. rubric:: Description + +The ``bernoulli`` class object is used in the ``generate`` and function +to provide Bernoulli distributed random numbers with probability ``p`` of a single trial success, +where :math:`p \in R; 0 \leq p \leq 1`. + +The probability distribution is given by: + +.. math:: + + P(X = 1) = p + +.. math:: + + P(X = 0) = 1 - p + +The cumulative distribution function is as follows: + +.. math:: + + F_p(x) = + \begin{cases} + 0, & x < 0 \\ + 1 - p, & 0 \leq x < 1, x \in R \\ + 1, & x \geq 1 + \end{cases} + + +class bernoulli +--------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class bernoulli { + public: + using method_type = Method; + using result_type = IntType; + + bernoulli(); + explicit bernoulli(float p); + + float p() const; + }; + } + + +.. container:: section + + .. rubric:: Template parameters + + .. container:: section + + typename IntType + Type of the produced values. Supported types: + * ``std::int32_t`` + * ``std::uint32_t`` + + .. container:: section + + typename Method = oneapi::mkl::rng::bernoulli_method::by_default + Transformation method, which will be used for generation. Supported types: + + * ``oneapi::mkl::rng::bernoulli_method::by_default`` + * ``oneapi::mkl::rng::bernoulli_method::icdf`` + + See description of the methods in :ref:`Distributions methods template parameter`. + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `bernoulli()`_ + - Default constructor + * - `explicit bernoulli(float p)`_ + - Constructor with parameters + * - `float p() const`_ + - Method to obtain probability `p` + +.. container:: section + + .. rubric:: Member types + + .. container:: section + + .. code-block:: cpp + + bernoulli::method_type = Method + + .. container:: section + + .. rubric:: Description + + The type which defines transformation method for generation. + + .. container:: section + + .. code-block:: cpp + + bernoulli::result_type = IntType + + .. container:: section + + .. rubric:: Description + + The type which defines type of generated random numbers. + +.. container:: section + + .. rubric:: Constructors + + .. container:: section + + .. _`bernoulli()`: + + .. code-block:: cpp + + bernoulli::bernoulli() + + .. container:: section + + .. rubric:: Description + + Default constructor for distribution, parameters set as `p` = 0.5f. + + .. container:: section + + .. _`explicit bernoulli(float p)`: + + .. code-block:: cpp + + explicit bernoulli::bernoulli(float p) + + .. container:: section + + .. rubric:: Description + + Constructor with parameters. `p` is a probability. + + .. container:: section + + .. rubric:: Throws + + oneapi::mkl::invalid_argument + Exception is thrown when `p > 1`, or `p < 0` + +.. container:: section + + .. rubric:: Characteristics + + .. container:: section + + .. _`float p() const`: + + .. code-block:: cpp + + float bernoulli::p() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `p` - probability. + +**Parent topic:** :ref:`onemkl_device_rng_distributions` + diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-bits.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-bits.rst new file mode 100644 index 000000000..bee0775dd --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-bits.rst @@ -0,0 +1,61 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_bits: + +bits +==== + +Generates bits of underlying engine (BRNG) integer sequence. + +.. rubric:: Description + +The ``bits`` class object is used in ``generate`` and function to provide integer +random values. Each integer can be treated as a vector of several bits. In pseudorandom generators +this randomness can be violated. See :ref:`VS Notes` for details. + + +class bits +---------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class bits { + using result_type = UIntType; + }; + } + + +.. container:: section + + .. rubric:: Template parameters + + .. container:: section + + typename UIntType + Type of the produced values. Supported types: + * ``std::uint32_t`` for philox4x32x10, mrg32k3a and mcg31m1 engines. + * ``std::uint64_t`` for mcg59. + +.. container:: section + + .. rubric:: Member types + + .. container:: section + + .. code-block:: cpp + + bits::result_type = UIntType + + .. container:: section + + .. rubric:: Description + + The type which defines type of generated random numbers. + +**Parent topic:** :ref:`onemkl_device_rng_distributions` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-exponential.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-exponential.rst new file mode 100644 index 000000000..2d3fda860 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-exponential.rst @@ -0,0 +1,207 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_exponential: + +exponential +=========== + + +Generates exponentially distributed random numbers. + +.. rubric:: Description + +The ``exponential`` class object is used in the ``generate`` function to provide +random numbers with exponential distribution that has displacement :math:`a` and scalefactor :math:`\beta`, +where :math:`a, \beta \in R ; \beta > 0`. + + +The probability density function is given by: + +.. math:: + + f_{a, \beta}(x) = + \begin{cases} + \frac{1}{\beta} \exp (-\frac{(x-a)}{\beta}), & x \geq a \\ + 0, & x < a + \end{cases}, + - \infty < x < + \infty + +The cumulative distribution function is as follows: + +.. math:: + + F_{a, \beta}(x) = + \begin{cases} + 1 - \exp (-\frac{(x-a)}{\beta}), & x \geq a \\ + 0, & x < a + \end{cases}, + - \infty < x < + \infty + + +class exponential +----------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class exponential { + public: + using method_type = Method; + using result_type = RealType; + + exponential(); + explicit exponential(RealType a, RealType beta); + + RealType a() const; + RealType beta() const; + }; + } + + +.. container:: section + + .. rubric:: Template parameters + + .. container:: section + + typename RealType + Type of the produced values. Supported types: + + * ``float`` + * ``double`` + + .. container:: section + + typename Method = oneapi::mkl::rng::exponential_method::by_default + Generation method. The specific values are as follows: + + * ``oneapi::mkl::rng::device::exponential_method::by_default`` + * ``oneapi::mkl::rng::device::exponential_method::icdf`` + * ``oneapi::mkl::rng::device::exponential_method::icdf_accurate`` + + See description of the methods in :ref:`Distributions methods template parameter`. + + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `exponential()`_ + - Default constructor + * - `explicit exponential(RealType a, RealType beta)`_ + - Constructor with parameters + * - `RealType a() const`_ + - Method to obtain displacement value + * - `RealType beta() const`_ + - Method to obtain scalefactor + +.. container:: section + + .. rubric:: Member types + + .. container:: section + + .. code-block:: cpp + + lognormal::method_type = Method + + .. container:: section + + .. rubric:: Description + + The type which defines transformation method for generation. + + .. container:: section + + .. code-block:: cpp + + lognormal::result_type = RealType + + .. container:: section + + .. rubric:: Description + + The type which defines type of generated random numbers. + +.. container:: section + + .. rubric:: Constructors + + .. container:: section + + .. _`exponential()`: + + .. code-block:: cpp + + exponential::exponential() + + .. container:: section + + .. rubric:: Description + + Default constructor for distribution, parameters set as `a` = 0.0, `beta` = 1.0. + + .. container:: section + + .. _`explicit exponential(RealType a, RealType beta)`: + + .. code-block:: cpp + + explicit exponential::exponential(RealType a, RealType beta) + + .. container:: section + + .. rubric:: Description + + Constructor with parameters. `a` is a displacement, `beta` is a scalefactor. + + .. container:: section + + .. rubric:: Throws + + oneapi::mkl::invalid_argument + Exception is thrown when :math:`beta \leq 0` + +.. container:: section + + .. rubric:: Characteristics + + .. container:: section + + .. _`RealType a() const`: + + .. code-block:: cpp + + RealType exponential::a() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `a` - displacement. + + .. container:: section + + .. _`RealType beta() const`: + + .. code-block:: cpp + + RealType exponential::beta() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `beta` - scalefactor value. + +**Parent topic:** :ref:`onemkl_device_rng_distributions` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-gaussian.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-gaussian.rst new file mode 100644 index 000000000..f809fce6b --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-gaussian.rst @@ -0,0 +1,213 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_gaussian: + +gaussian +======== + +Generates normally distributed random numbers. + +.. rubric:: Description + +The ``gaussian`` class object is used in the ``generate`` and function to provide +random numbers with normal (Gaussian) distribution with mean (``a``) and standard deviation +(``stddev``, :math:`\sigma` ), where :math:`a, \sigma \in \mathbb{R}; \sigma > 0` + +The probability density function is given by: + +.. math:: + + f_{a, \sigma} (x) = + \frac{1}{\sigma \sqrt{2 \pi}} + \exp + \left( + - \frac{(y-a)^2}{2\sigma^2} + \right) dy, + - \infty < x < + \infty + +The cumulative distribution function is as follows: + +.. math:: + + F_{a, \sigma} (x) = + \int_{-\infty}^{x} + \frac{1}{\sigma \sqrt{2 \pi}} + \exp + \left( + - \frac{(y-a)^2}{2\sigma^2} + \right) dy, + - \infty < x < + \infty + + +The cumulative distribution function :math:`F_{a, \sigma}(x)` can be expressed +in terms of standard normal distribution :math:`\phi(x)` as + +.. math:: + + F_{a,\sigma}(x) = \phi((x - a)/\sigma) + + +class gaussian +-------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class gaussian { + public: + using method_type = Method; + using result_type = RealType; + + gaussian(); + explicit gaussian(RealType mean, RealType stddev); + + RealType mean() const; + RealType stddev() const; + }; + } + +.. container:: section + + .. rubric:: Template parameters + + .. container:: section + + typename RealType + Type of the produced values. Supported types: + + * ``float`` + * ``double`` + + .. container:: section + + typename Method + Generation method. The specific values are as follows: + + * ``oneapi::mkl::rng::device::gaussian_method::by_default`` + * ``oneapi::mkl::rng::device::gaussian_method::box_muller2`` + + See description of the methods in :ref:`Distributions methods template parameter` + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `gaussian()`_ + - Default constructor + * - `explicit gaussian(RealType mean, RealType stddev)`_ + - Constructor with parameters + * - `RealType mean() const`_ + - Method to obtain left bound `a` + * - `RealType stddev() const`_ + - Method to obtain right bound `b` + +.. container:: section + + .. rubric:: Member types + + .. container:: section + + .. code-block:: cpp + + gaussian::method_type = Method + + .. container:: section + + .. rubric:: Description + + The type which defines transformation method for generation. + + .. container:: section + + .. code-block:: cpp + + gaussian::result_type = RealType + + .. container:: section + + .. rubric:: Description + + The type which defines type of generated random numbers. + +.. container:: section + + .. rubric:: Constructors + + .. container:: section + + .. _`gaussian()`: + + .. code-block:: cpp + + gaussian::gaussian() + + .. container:: section + + .. rubric:: Description + + Default constructor for distribution, parameters set as `mean` = 0.0, `stddev` = 1.0. + + .. container:: section + + .. _`explicit gaussian(RealType mean, RealType stddev)`: + + .. code-block:: cpp + + explicit gaussian::gaussian(RealType a, RealType b) + + .. container:: section + + .. rubric:: Description + + Constructor with parameters. `mean` is a mean value, `stddev` is a standard deviation value. + + .. container:: section + + .. rubric:: Throws + + oneapi::mkl::invalid_argument + Exception is thrown when `stddev` :math:`\leq 0` + +.. container:: section + + .. rubric:: Characteristics + + .. container:: section + + .. _`RealType mean() const`: + + .. code-block:: cpp + + RealType gaussian::mean() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `mean` - mean value. + + .. container:: section + + .. _`RealType stddev() const`: + + .. code-block:: cpp + + RealType gaussian::stddev() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `stddev` - standard deviation value. + +**Parent topic:** :ref:`onemkl_device_rng_distributions` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-generate-routines.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-generate-routines.rst new file mode 100644 index 000000000..8b60e7429 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-generate-routines.rst @@ -0,0 +1,18 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_generate_routines: + +Device Generate Routines +======================== + + +Use the :ref:`onemkl_device_rng_generate` routine to obtain random numbers from a given engine with proper +statistics of a given distribution. + +.. toctree:: + :maxdepth: 1 + :hidden: + + device-rng-generate.rst \ No newline at end of file diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-generate.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-generate.rst new file mode 100644 index 000000000..02e62390c --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-generate.rst @@ -0,0 +1,52 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_generate: + +generate +======== + +.. rubric:: Description + +Entry point to obtain random numbers from a given engine with proper statistics of a given distribution. + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + auto generate(Distr& distr, Engine& engine) -> + typename std::conditional>::type + } + +.. container:: section + + .. rubric:: Template Parameters + + Distr + Type of distribution which is used for random number generation. + + Engine + Type of engine which is used for random number generation. + +.. container:: section + + .. rubric:: Input Parameters + + distr + Distribution object. See :ref:`onemkl_device_rng_distributions` for details. + + engine + Engine object. See :ref:`onemkl_device_rng_engines` for details. + +.. container:: section + + .. rubric:: Return Value + + Returns `Distr::result_type` if `Engine::vec_size == 1` or `sycl::vec` + with generated random numbers. + +**Parent topic:** :ref:`onemkl_device_rng_generate_routines` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-lognormal.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-lognormal.rst new file mode 100644 index 000000000..d9e065a5a --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-lognormal.rst @@ -0,0 +1,242 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_lognormal: + +lognormal +========= + + +Generates lognormally distributed random numbers. + +.. rubric:: Description + +The ``lognormal`` class object is used in the ``generate`` and function to provide +random numbers with average of distribution (``m``, ``a``) and standard deviation (``s``, :math:`\sigma`) of +subject normal distribution, displacement (``displ``, ``b``), and scalefactor (``scale``, :math:`\beta`), where +:math:`a, \sigma, b, \beta \in \mathbb{R}; \sigma > 0, \beta > 0`. + +The probability density function is given by: + +.. math:: + + f_{a, \sigma, b, \beta} (x) = + \begin{cases} + \frac{1}{\sigma (x - b) \sqrt {2\pi}} + \exp \left( + -\frac{\ln( \frac{x - b}{\beta}) - a)^2}{2\sigma^2} + \right), & x > b \\ + 0, & x \leq b + \end{cases} + + +The cumulative distribution function is as follows: + +.. math:: + + F_{a, \sigma, b, \beta} (x) = + \begin{cases} + \Phi \left(\frac{\ln( \frac{x - b}{\beta}) - a}{\sigma}\right), & x > b \\ + 0, & x \leq b + \end{cases} + + +class lognormal +--------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class lognormal { + public: + using method_type = Method; + using result_type = RealType; + + lognormal(); + explicit lognormal(RealType m, RealType s, RealType displ = (RealType)0.0, RealType scale = (RealType)1.0); + + RealType m() const; + RealType s() const; + RealType displ() const; + RealType scale() const; + }; + } + +.. container:: section + + .. rubric:: Template parameters + + .. container:: section + + typename RealType + Type of the produced values. Supported types: + + * ``float`` + * ``double`` + + .. container:: section + + typename Method = oneapi::mkl::rng::lognormal_method::by_default + Transformation method, which will be used for generation. Supported types: + + * ``oneapi::mkl::rng::device::lognormal_method::by_default`` + * ``oneapi::mkl::rng::device::lognormal_method::box_muller2`` + + See description of the methods in :ref:`Distributions methods template parameter`. + + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `lognormal()`_ + - Default constructor + * - `explicit lognormal(RealType m, RealType s, RealType displ = (RealType)0.0, RealType scale = (RealType)1.0)`_ + - Constructor with parameters + * - `RealType m() const`_ + - Method to obtain mean value + * - `RealType s() const`_ + - Method to obtain standard deviation value + * - `RealType displ() const`_ + - Method to obtain displacement value + * - `RealType scale() const`_ + - Method to obtain scalefactor value + + +.. container:: section + + .. rubric:: Member types + + .. container:: section + + .. code-block:: cpp + + lognormal::method_type = Method + + .. container:: section + + .. rubric:: Description + + The type which defines transformation method for generation. + + .. container:: section + + .. code-block:: cpp + + lognormal::result_type = RealType + + .. container:: section + + .. rubric:: Description + + The type which defines type of generated random numbers. + +.. container:: section + + .. rubric:: Constructors + + .. container:: section + + .. _`lognormal()`: + + .. code-block:: cpp + + lognormal::lognormal() + + .. container:: section + + .. rubric:: Description + + Default constructor for distribution, parameters set as `m` = 0.0, `s` = 1.0, `displ` = 0.0, `scale` = 1.0. + + .. container:: section + + .. _`explicit lognormal(RealType m, RealType s, RealType displ = (RealType)0.0, RealType scale = (RealType)1.0)`: + + .. code-block:: cpp + + explicit lognormal::lognormal(RealType m, RealType s, RealType displ = (RealType)0.0, RealType scale = (RealType)1.0) + + .. container:: section + + .. rubric:: Description + + Constructor with parameters. `m` is a mean value, `s` is a standard deviation value, `displ` is a displacement value, `scale` is a scalefactor value. + + .. container:: section + + .. rubric:: Throws + + oneapi::mkl::invalid_argument + Exception is thrown when :math:`s \leq 0`, or :math:`scale \leq 0` + +.. container:: section + + .. rubric:: Characteristics + + .. container:: section + + .. _`RealType m() const`: + + .. code-block:: cpp + + RealType lognormal::m() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `m` - mean value. + + .. container:: section + + .. _`RealType s() const`: + + .. code-block:: cpp + + RealType lognormal::s() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `s` - standard deviation value. + + .. container:: section + + .. _`RealType displ() const`: + + .. code-block:: cpp + + RealType lognormal::displ() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `displ` - displacement value. + + .. container:: section + + .. _`RealType scale() const`: + + .. code-block:: cpp + + RealType lognormal::scale() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `scale` - scalefactor value. + +**Parent topic:** :ref:`onemkl_device_rng_distributions` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-mcg31m1.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-mcg31m1.rst new file mode 100644 index 000000000..54bfdbe34 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-mcg31m1.rst @@ -0,0 +1,121 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_mcg31m1: + +mcg31m1 +======= + +The 31-bit multiplicative congruential pseudorandom number generator MCG(:math:`1132489760, 2^{32}-1`) :ref:`[L'Ecuyer99a] `. + +.. rubric:: Description + +The mcg31m1 engine is a 31-bit multiplicative congruential generator :ref:`[L'Ecuyer99] `. +The mcg31m1 generator belongs to linear congruential generators with the period length of approximately :math:`2^{31}`. +Such generators are still used as default random number generators in various software systems, mainly due to the +simplicity of the portable versions implementation, speed, and compatibility with the earlier systems versions. +However, their period length does not meet the requirements for modern basic generators. Still, the mcg31m1 generator +possesses good statistic properties and you may successfully use it to generate random numbers of different +distributions for small samplings. + +.. container:: section + + .. rubric:: Generation algorithm + + :math:`x_n=ax_{n-1}(mod \ m)` + + :math:`u_n = x_n / m` + + :math:`a = 1132489760, m=2^{31} - 1` + +class mcg31m1 +------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class mcg31m1 { + public: + static constexpr std::uint32_t default_seed = 1; + static constexpr std::int32_t vec_size = VecSize; + + mcg31m1(); + mcg31m1(std::uint32_t seed, std::uint64_t offset = 0); + mcg31m1(std::initializer_list seed, std::uint64_t offset = 0); + }; + } + +.. container:: section + + .. rubric:: Class Template Parameters + + VecSize + Describes the size of vector which will be produced by generate function by this engine. VecSize values + may be 1, 2, 3, 4, 8, 16 as ``sycl::vec`` class size. By default VecSize = 1, for this case, a single + random number is returned by the ``generate`` function. + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `mcg31m1()`_ + - Default constructor + * - `mcg31m1(std::uint32_t seed, std::uint64_t offset = 0)`_ + - Constructor for common seed initialization of the engine and common number of skipped elements + * - `mcg31m1(std::initializer_list seed, std::uint64_t offset = 0)`_ + - Constructor for extended seed initialization of the engine and common number of skipped elements + +.. container:: section + + .. rubric:: Constructors + + .. _`mcg31m1()`: + + .. code-block:: cpp + + mcg31m1::mcg31m1() + + .. _`mcg31m1(std::uint32_t seed, std::uint64_t offset = 0)`: + + .. code-block:: cpp + + mcg31m1::mcg31m1(std::uint32_t seed, std::uint64_t offset = 0) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume :math:`x_0 = seed \ mod \ 0x7FFFFFFF`, + if :math:`x_0 = 0`, assume :math:`x_0 = 1`. + + offset + Number of skipped elements. + + .. _`mcg31m1(std::initializer_list seed, std::uint64_t offset = 0)`: + + .. code-block:: cpp + + mcg31m1::mcg31m1(std::initializer_list seed, std::uint64_t offset = 0) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume :math:`x_0 = seed \ mod \ 0x7FFFFFFF`, + if :math:`x_0 = 0`, assume :math:`x_0 = 1`. + + offset + Number of skipped elements. + +**Parent topic:** :ref:`onemkl_device_rng_engines` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-mcg59.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-mcg59.rst new file mode 100644 index 000000000..33dbd2e3d --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-mcg59.rst @@ -0,0 +1,117 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_mcg59: + +mcg59 +===== + +The 59-bit multiplicative congruential pseudorandom number generator MCG(:math:`13^{13}, 2^{59}`) +from NAG Numerical Libraries. + +.. rubric:: Description + +The mcg59 engine is a 59-bit multiplicative congruential generator from NAG Numerical Libraries :ref:`NAG `. +The mcg59 generator belongs to linear congruential generators with the period length of approximately :math:`2^{57}`. + +.. container:: section + + .. rubric:: Generation algorithm + + :math:`x_n=ax_{n-1}(mod \ m)` + + :math:`u_n = x_n / m` + + :math:`a = 13^{13}, m=2^{59}` + +class mcg59 +----------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class mcg59 { + public: + static constexpr std::uint32_t default_seed = 1; + static constexpr std::int32_t vec_size = VecSize; + + mcg59(); + mcg59(std::uint32_t seed, std::uint64_t offset = 0); + mcg59(std::initializer_list seed, std::uint64_t offset = 0); + }; + } + +.. container:: section + + .. rubric:: Class Template Parameters + + VecSize + Describes the size of vector which will be produced by generate function by this engine. VecSize values + may be 1, 2, 3, 4, 8, 16 as ``sycl::vec`` class size. By default VecSize = 1, for this case, a single + random number is returned by the ``generate`` function. + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `mcg59()`_ + - Default constructor + * - `mcg59(std::uint32_t seed, std::uint64_t offset = 0)`_ + - Constructor for common seed initialization of the engine and common number of skipped elements + * - `mcg59(std::initializer_list seed, std::uint64_t offset = 0)`_ + - Constructor for extended seed initialization of the engine and common number of skipped elements + +.. container:: section + + .. rubric:: Constructors + + .. _`mcg59()`: + + .. code-block:: cpp + + mcg59::mcg59() + + .. _`mcg59(std::uint32_t seed, std::uint64_t offset = 0)`: + + .. code-block:: cpp + + mcg59::mcg59(std::uint32_t seed, std::uint64_t offset = 0) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume :math:`x_0 = seed \ mod \ 2^{59}`, + if :math:`x_0 = 0`, assume :math:`x_0 = 1`. + + offset + Number of skipped elements. + + .. _`mcg59(std::initializer_list seed, std::uint64_t offset = 0)`: + + .. code-block:: cpp + + mcg59::mcg59(std::initializer_list seed, std::uint64_t offset = 0) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume :math:`x_0 = seed \ mod \ 2^{59}`, + if :math:`x_0 = 0`, assume :math:`x_0 = 1`. + + offset + Number of skipped elements. + +**Parent topic:** :ref:`onemkl_device_rng_engines` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-mrg32k3a.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-mrg32k3a.rst new file mode 100644 index 000000000..ffd312519 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-mrg32k3a.rst @@ -0,0 +1,213 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_mrg32k3a: + +mrg32k3a +======== + +The combined multiple recursive pseudorandom number generator MRG32k3a. + +.. rubric:: Description + +MRG32k3a engine is a 32-bit combined multiple recursive generator with two components of order 3 +[:ref:`L'Ecuyer99a`]. MRG32k3a combined generator meets the requirements for +modern RNGs, such as good multidimensional uniformity, or a long period (:math:`p \approx 2^{191}`). + + +.. container:: section + + .. rubric:: Generation algorithm + + + :math:`x_n=a_{11} x_{n-1} + a_{12} x_{n-2} + a_{13} x_{n-3}(mod \ m_{1})` + + :math:`y_n = a_{21} y_{n-1} + a_{22} y_{n-2} + a_{23} (mod \ m_2)` + + :math:`z_n = x_n - y_n (mod \ m_{1})` + + :math:`u_n = z_n / m_1` + + :math:`a_{11} = 0, a_{12} = 1403580, a_{13} = -810728, m_1 = 2^{32} - 209` + + :math:`a_{21} = 527612, a_{22} = 0, a_{23} = -1370589, m_2 = 2^{32} - 22853` + + +class mrg32k3a +-------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class mrg32k3a { + public: + static constexpr std::uint32_t default_seed = 1; + static constexpr std::int32_t vec_size = VecSize; + + mrg32k3a(); + mrg32k3a(std::uint32_t seed, std::uint64_t offset = 0); + mrg32k3a(std::initializer_list seed, std::uint64_t offset = 0); + mrg32k3a(std::uint32_t seed, std::initializer_list offset); + mrg32k3a(std::initializer_list seed, std::initializer_list offset); + }; + } + + +.. container:: section + + .. rubric:: Class Template Parameters + + VecSize + Describes the size of vector which will be produced by generate function by this engine. VecSize values + may be 1, 2, 3, 4, 8, 16 as ``sycl::vec`` class size. By default VecSize = 1, for this case, a single + random number is returned by the ``generate`` function. + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `mrg32k3a()`_ + - Default constructor + * - `mrg32k3a(std::uint32_t seed, std::uint64_t offset = 0)`_ + - Constructor for common seed initialization of the engine and common number of skipped elements + * - `mrg32k3a(std::initializer_list seed, std::uint64_t offset = 0)`_ + - Constructor for extended seed initialization of the engine and common number of skipped elements + * - `mrg32k3a(std::uint32_t seed, std::initializer_list offset)`_ + - Constructor for common seed initialization of the engine and extended number of skipped elements + * - `mrg32k3a(std::initializer_list seed, std::initializer_list offset)`_ + - Constructor for extended seed initialization of the engine and extended number of skipped elements + +.. container:: section + + .. rubric:: Constructors + + .. _`mrg32k3a()`: + + .. code-block:: cpp + + mrg32k3a::mrg32k3a() + + .. _`mrg32k3a(std::uint32_t seed, std::uint64_t offset = 0)`: + + .. code-block:: cpp + + mrg32k3a::mrg32k3a(std::uint32_t seed, std::uint64_t offset = 0) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume + if :math:`n = 0: x_{-3} = x_{-2} = x_{-1} = y_{-3} = y_{-2} = y_{-1} = 1` + + if :math:`n = 1: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = x_{-1} = y_{-3} = y_{-2} = y_{-1} = 1` + + if :math:`n = 2: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = y_{-3} = y_{-2} = y_{-1} = 1` + + if :math:`n = 3: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = seed[2] \ mod \ m_1` + + :math:`y_{-3} = y_{-2} = y_{-1} = 1` + + if :math:`n = 4: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = seed[2] \ mod \ m_1` + + :math:`y_{-3} = seed[3] \ mod \ m_2, y_{-2} = y_{-1} = 1` + + if :math:`n = 5: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = seed[2] \ mod \ m_1` + + :math:`y_{-3} = seed[3] \ mod \ m_2, y_{-2} = seed[4] \ mod \ m_2, y_{-1} = 1` + + if :math:`n \geqslant 6: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = seed[2] \ mod \ m_1` + + :math:`y_{-3} = seed[3] \ mod \ m_2, y_{-2} = seed[4] \ mod \ m_2, y_{-1} = seed[5] \ mod \ m_2` + + if the values prove to be :math:`x_{-3} = x_{-2} = x_{-1} = 0`, assume :math:`x_{-3} = 1` + + if the values prove to be :math:`y_{-3} = y_{-2} = y_{-1} = 0`, assume :math:`y_{-3} = 1`. + + offset + Number of skipped elements. + + .. _`mrg32k3a(std::initializer_list seed, std::uint64_t offset = 0)`: + + .. code-block:: cpp + + mrg32k3a::mrg32k3a(std::initializer_list seed, std::uint64_t offset = 0) + + .. container:: section + + .. rubric:: Input Parameters + + seed + Initial conditions of the engine state. + + offset + Number of skipped elements. + + .. _`mrg32k3a(std::uint32_t seed, std::initializer_list offset)`: + + .. code-block:: cpp + + mrg32k3a::mrg32k3a(std::uint32_t seed, std::initializer_list offset) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume + if :math:`n = 0: x_{-3} = x_{-2} = x_{-1} = y_{-3} = y_{-2} = y_{-1} = 1` + + if :math:`n = 1: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = x_{-1} = y_{-3} = y_{-2} = y_{-1} = 1` + + if :math:`n = 2: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = y_{-3} = y_{-2} = y_{-1} = 1` + + if :math:`n = 3: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = seed[2] \ mod \ m_1` + + :math:`y_{-3} = y_{-2} = y_{-1} = 1` + + if :math:`n = 4: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = seed[2] \ mod \ m_1` + + :math:`y_{-3} = seed[3] \ mod \ m_2, y_{-2} = y_{-1} = 1` + + if :math:`n = 5: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = seed[2] \ mod \ m_1` + + :math:`y_{-3} = seed[3] \ mod \ m_2, y_{-2} = seed[4] \ mod \ m_2, y_{-1} = 1` + + if :math:`n \geqslant 6: x_{-3} = seed[0] \ mod \ m_1, x_{-2} = seed[1] \ mod \ m_1, x_{-1} = seed[2] \ mod \ m_1` + + :math:`y_{-3} = seed[3] \ mod \ m_2, y_{-2} = seed[4] \ mod \ m_2, y_{-1} = seed[5] \ mod \ m_2` + + if the values prove to be :math:`x_{-3} = x_{-2} = x_{-1} = 0`, assume :math:`x_{-3} = 1` + + if the values prove to be :math:`y_{-3} = y_{-2} = y_{-1} = 0`, assume :math:`y_{-3} = 1`. + + offset + Number of skipped elements. Offset is calculated as: ``num_to_skip`` [0]+ ``num_to_skip`` [1]*2\ :sup:`64` + ``num_to_skip`` [2]\* 2\ :sup:`128` + … + ``num_to_skip`` [``n``-1]\*2\ :sup:`64` \*(``n``-1). + + .. _`mrg32k3a(std::initializer_list seed, std::initializer_list offset)`: + + .. code-block:: cpp + + mrg32k3a::mrg32k3a(std::initializer_list seed, std::initializer_list offset) + + .. container:: section + + .. rubric:: Input Parameters + + seed + Initial conditions of the engine state. + + offset + Number of skipped elements. Offset is calculated as: ``num_to_skip`` [0]+ ``num_to_skip`` [1]*2\ :sup:`64` + ``num_to_skip`` [2]\* 2\ :sup:`128` + … + ``num_to_skip`` [``n``-1]\*2\ :sup:`64` \*(``n``-1). + +**Parent topic:** :ref:`onemkl_device_rng_engines` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-philox4x32x10.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-philox4x32x10.rst new file mode 100644 index 000000000..d5f51b806 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-philox4x32x10.rst @@ -0,0 +1,192 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_philox4x32x10: + +philox4x32x10 +============= + +A Philox4x32-10 counter-based pseudorandom number generator [:ref:`Salmon11`]. + +.. rubric:: Description + +The Philox4x32x10 engine is a keyed family of generator of counter-based BRNG. The state consists of 128-bit integer counter :math:`c` and two 32-bits keys :math:`k_0` and :math:`k_1`. + +.. container:: section + + .. rubric:: Generation algorithm + + The generator has 32-bit integer output obtained in the following way [:ref:`Salmon11 `]: + + 1. :math:`c_n=c_{n-1} + 1` + 2. :math:`\omega_n = f(c_n)`, where :math:`f` is a function that takes 128-bit argument and returns a 128-bit number. The returned number is obtained as follows: + 2.1. The argument :math:`c` is interpreted as four 32-bit numbers :math:`c = \overline{L_1 R_1 L_0 R_0}`, where :math:`\overline{A B C D} = A \cdot 2^{96} + B \cdot 2^{64} + C \cdot 2^{32} + D`, put :math:`k_0^0 =k_0, k_1^0=k_1`. + + 2.2. The following recurrence is calculated: + + :math:`L_1^{i+1} =mullo(R_1^i, 0xD2511F53)` + + :math:`R_1^{i+1} =mulhi(R_0^i, 0xCD9E8D57) \oplus k_0^i \oplus L_0^i` + + :math:`L_0^{i+1} =mullo(R_0^i, 0xCD9E8D57)` + + :math:`R_0^{i+1} =mulhi(R_1^i, 0xD2511F53) \oplus k_1^i \oplus L_1^i` + + :math:`k_0^{i+1} =k_0^i + 0xBB67AE85` + + :math:`k_1^{i+1} =k_1^i + 0x9E3779B9`, where :math:`mulhi(a, b)` and :math:`mullo(a, b)` are high and low parts of the :math:`a \cdot b` product respectively. + + 2.3. Put :math:`f(c) = \overline{L_1^N R_1^N L_0^N R_0^N}`, where :math:`N = 10` + + 3. Integer output: :math:`r_{4n + k} = \omega_n(k)`, where :math:`\omega_n(k)` is the k-th 32-bit integer in quadruple :math:`\omega_n, k = 0, 1, 2, 3` + 4. Real output: :math:`u_n=(int)r_n / 2^{32} + 1/2` + + + +class philox4x32x10 +------------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class philox4x32x10 { + public: + static constexpr std::uint64_t default_seed = 1; + static constexpr std::int32_t vec_size = VecSize; + + philox4x32x10(); + philox4x32x10(std::uint64_t seed, std::uint64_t offset = 0); + philox4x32x10(std::initializer_list seed, std::uint64_t offset = 0); + philox4x32x10(std::uint64_t seed, std::initializer_list offset); + philox4x32x10(std::initializer_list seed, std::initializer_list offset); + }; + } + +.. container:: section + + .. rubric:: Class Template Parameters + + VecSize + Describes the size of vector which will be produced by generate function by this engine. VecSize values + may be 1, 2, 3, 4, 8, 16 as ``sycl::vec`` class size. By default VecSize = 1, for this case, a single + random number is returned by the ``generate`` function. + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `philox4x32x10()`_ + - Default constructor + * - `philox4x32x10(std::uint32_t seed, std::uint64_t offset = 0)`_ + - Constructor for common seed initialization of the engine and common number of skipped elements + * - `philox4x32x10(std::initializer_list seed, std::uint64_t offset = 0)`_ + - Constructor for extended seed initialization of the engine and common number of skipped elements + * - `philox4x32x10(std::uint32_t seed, std::initializer_list offset)`_ + - Constructor for common seed initialization of the engine and extended number of skipped elements + * - `philox4x32x10(std::initializer_list seed, std::initializer_list offset)`_ + - Constructor for extended seed initialization of the engine and extended number of skipped elements + +.. container:: section + + .. rubric:: Constructors + + .. _`philox4x32x10()`: + + .. code-block:: cpp + + philox4x32x10::philox4x32x10() + + .. _`philox4x32x10(std::uint32_t seed, std::uint64_t offset = 0)`: + + .. code-block:: cpp + + philox4x32x10::philox4x32x10(std::uint32_t seed, std::uint64_t offset = 0) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume :math:`k = seed, c = 0`, + where :math:`k` is a 64-bit key, :math:`c` is a 128-bit counter. + + offset + Number of skipped elements. + + .. _`philox4x32x10(std::initializer_list seed, std::uint64_t offset = 0)`: + + .. code-block:: cpp + + philox4x32x10::philox4x32x10(std::initializer_list seed, std::uint64_t offset = 0) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume + if :math:`n = 0: k = 0, c = 0` + + if :math:`n = 1: k = seed[0], c = 0` + + if :math:`n = 2: k = seed[0], c = seed[1]` + + if :math:`n = 3: k = seed[0], c = seed[1] + seed[2] \cdot 2^{64}` + + for :math:`n > 3` following arguments are ignored. + + offset + Number of skipped elements. + + .. _`philox4x32x10(std::uint32_t seed, std::initializer_list offset)`: + + .. code-block:: cpp + + philox4x32x10::philox4x32x10(std::uint32_t seed, std::initializer_list offset) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume :math:`k = seed, c = 0`, + where :math:`k` is a 64-bit key, :math:`c` is a 128-bit counter. + + offset + Number of skipped elements. Offset is calculated as: ``num_to_skip`` [0]+ ``num_to_skip`` [1]*2\ :sup:`64` + ``num_to_skip`` [2]\* 2\ :sup:`128` + … + ``num_to_skip`` [``n``-1]\*2\ :sup:`64` \*(``n``-1). + + .. _`philox4x32x10(std::initializer_list seed, std::initializer_list offset)`: + + .. code-block:: cpp + + philox4x32x10::philox4x32x10(std::initializer_list seed, std::initializer_list offset) + + .. container:: section + + .. rubric:: Input Parameters + + seed + The initial conditions of the generator state, assume + if :math:`n = 0: k = 0, c = 0` + + if :math:`n = 1: k = seed[0], c = 0` + + if :math:`n = 2: k = seed[0], c = seed[1]` + + if :math:`n = 3: k = seed[0], c = seed[1] + seed[2] \cdot 2^{64}` + + for :math:`n > 3` following arguments are ignored. + + offset + Number of skipped elements. Offset is calculated as: ``num_to_skip`` [0]+ ``num_to_skip`` [1]*2\ :sup:`64` + ``num_to_skip`` [2]\* 2\ :sup:`128` + … + ``num_to_skip`` [``n``-1]\*2\ :sup:`64` \*(``n``-1). + +**Parent topic:** :ref:`onemkl_device_rng_engines` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-poisson.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-poisson.rst new file mode 100644 index 000000000..c2270cba2 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-poisson.rst @@ -0,0 +1,182 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_poisson: + +poisson +======= + +Generates Poisson distributed random values. + +.. rubric:: Description + +The ``poisson`` class object is used in the ``generate`` and function +to provide Poisson distributed random numbers with distribution parameter λ, where :math:`\lambda \in R; \lambda > 0`. + + +The probability distribution is given by: + +.. math:: + + P(X = k) = \frac{\lambda^k e^{-\lambda}}{k!} + +:math:`k \in \{0, 1, 2, \ldots \}`. + +The cumulative distribution function is as follows: + +.. math:: + + F_{\lambda}(x) = + \begin{cases} + \sum_{k=0}^{\lfloor x \rfloor} \frac{\lambda^k e^{-\lambda}}{k!}, & x \geq 0 \\ + 0, & x < 0 + \end{cases}, + x \in R + + +class poisson +------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class poisson { + public: + using method_type = Method; + using result_type = IntType; + + poisson(); + explicit poisson(double lambda); + + double lambda() const; + }; + } + + +.. container:: section + + .. rubric:: Template parameters + + .. container:: section + + typename IntType + Type of the produced values. Supported types: + * ``std::int32_t`` + * ``std::uint32_t`` + + .. container:: section + + typename Method = oneapi::mkl::rng::poisson_method::by_default + Transformation method, which will be used for generation. Supported types: + + * ``oneapi::mkl::rng::device::poisson_method::by_default`` + * ``oneapi::mkl::rng::device::poisson_method::devroye`` + + See description of the methods in :ref:`Distributions methods template parameter`. + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `poisson()`_ + - Default constructor + * - `explicit poisson(double lambda)`_ + - Constructor with parameters + * - `double lambda() const`_ + - Method to obtain distribution parameter + +.. container:: section + + .. rubric:: Member types + + .. container:: section + + .. code-block:: cpp + + poisson::method_type = Method + + .. container:: section + + .. rubric:: Description + + The type which defines transformation method for generation. + + .. container:: section + + .. code-block:: cpp + + poisson::result_type = IntType + + .. container:: section + + .. rubric:: Description + + The type which defines type of generated random numbers. + +.. container:: section + + .. rubric:: Constructors + + .. container:: section + + .. _`poisson()`: + + .. code-block:: cpp + + poisson::poisson() + + .. container:: section + + .. rubric:: Description + + Default constructor for distribution, parameters set as `lambda` = 0.5. + + .. container:: section + + .. _`explicit poisson(double lambda)`: + + .. code-block:: cpp + + explicit poisson::poisson(double lambda) + + .. container:: section + + .. rubric:: Description + + Constructor with parameters. `lambda` is a distribution parameter. + + .. container:: section + + .. rubric:: Throws + + oneapi::mkl::invalid_argument + Exception is thrown when :math:`lambda \leq 0` + +.. container:: section + + .. rubric:: Characteristics + + .. container:: section + + .. _`double lambda() const`: + + .. code-block:: cpp + + double poisson::lambda() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `lambda`. + +**Parent topic:** :ref:`onemkl_device_rng_distributions` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-skip-ahead.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-skip-ahead.rst new file mode 100644 index 000000000..173b3f782 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-skip-ahead.rst @@ -0,0 +1,79 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_skip_ahead: + +skip_ahead +========== + +.. rubric:: Description + +Proceed state of engine by the skip-ahead method. + +The ``skip_ahead`` function supports the following interfaces to apply the skip-ahead method: + +- Common interface +- Interface with a partitioned number of skipped elements + + +skip_ahead +---------- + +.. rubric:: Common Interface + + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + void skip_ahead (Engine& engine, std::uint64_t num_to_skip) + } + +.. container:: section + + .. rubric:: Template Parameters + + Engine + Object of engine class, which supports the block-splitting method. + +.. container:: section + + .. rubric:: Input Parameters + + engine + Engine which state would be skipped. + + num_to_skip + Number of skipped elements. + + +.. rubric:: Interface with a partitioned number of skipped elements + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + void skip_ahead (Engine& engine, std::initializer_list num_to_skip) + } + +.. container:: section + + .. rubric:: Template Parameters + + Engine + Object of engine class, which supports the block-splitting method. + +.. container:: section + + .. rubric:: Input Parameters + + engine + Engine which state would be skipped. + + num_to_skip + Partitioned number of skipped elements. The total number of skipped elements + would be: :math:`num\_to\_skip[0] + num\_to\_skip[1] \cdot 2^{64} + ... + + num\_to\_skip[1] \cdot 2^{64 (n - 1)}`, where `n` is a number of elements in `num_to_skip` list. + +**Parent topic:** :ref:`onemkl_device_rng_service_routines` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-uniform-bits.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-uniform-bits.rst new file mode 100644 index 000000000..56bc8cf7d --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-uniform-bits.rst @@ -0,0 +1,64 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_uniform_bits: + +uniform_bits +============ + +Generates uniformly distributed bits in 32/64-bit chunks. + +.. rubric:: Description + +The ``uniform_bits`` class object is used in ``generate`` and function to generate uniformly distributed bits +in 32/64-bit chunks. It is designed to ensure each bit in the 32/64-bit chunk is uniformly distributed. This distribution +is supported for philox4x32x10 and mcg59 engines. When generating 64-bit chunks, twice as much engine offset needs to +be provided. + +``UIntType`` denotes the chunk size and can be ``std::uint32_t``, ``std::uint64_t``. See :ref:`VS Notes` for details. + + +class uniform_bits +------------------ + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class uniform_bits { + using result_type = UIntType; + }; + } + + +.. container:: section + + .. rubric:: Template parameters + + .. container:: section + + typename UIntType + Type of the produced values. Supported types: + * ``std::uint32_t`` + * ``std::uint64_t`` + +.. container:: section + + .. rubric:: Member types + + .. container:: section + + .. code-block:: cpp + + uniform_bits::result_type = UIntType + + .. container:: section + + .. rubric:: Description + + The type which defines type of generated random numbers. + +**Parent topic:** :ref:`onemkl_device_rng_distributions` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-uniform-continuous.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-uniform-continuous.rst new file mode 100644 index 000000000..ef218522c --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-uniform-continuous.rst @@ -0,0 +1,208 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_uniform_continuous: + +uniform (Continuous) +==================== + + +Generates random numbers with uniform distribution. + +.. rubric:: Description + +The class object is used in ``generate`` function to provide random numbers uniformly +distributed over the interval [``a``, ``b``), where ``a``, ``b`` are the left and right bounds of the interval, +respectively, and :math:`a, b \in R ; a < b` +``a``, ``b∈R`` ; ``a`` < ``b``. + + +The probability density function is given by: + +.. math:: + + f_{a, b}(x) = + \begin{cases} + \frac{1}{b-a}, & x \in [a, b)\\ + 1, & x \notin [a, b) \end{cases}, -\infty < x < +\infty + +The cumulative distribution function is as follows: + + +.. math:: + + F_{a, b}(x) = + \begin{cases} + 0, & x < a \\ + \frac{x-a}{b-a}, & a \leq x < b \\ + 1, & x \geq b + \end{cases}, + -\infty < x < +\infty + + +class uniform +------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class uniform { + public: + using method_type = Method; + using result_type = Type; + + uniform(); + explicit uniform(Type a, Type b); + + Type a() const; + Type b() const; + }; + } + + +.. container:: section + + .. rubric:: Template parameters + + .. container:: section + + typename RealType + Type of the produced values. Supported types: + + * ``float`` + * ``double`` + + .. container:: section + + typename Method + Generation method. The specific values are as follows: + + * ``oneapi::mkl::rng::device::uniform_method::by_default`` + * ``oneapi::mkl::rng::device::uniform_method::standard`` + * ``oneapi::mkl::rng::device::uniform_method::accurate`` + + See description of the methods in :ref:`Distributions methods template parameter` + + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `uniform()`_ + - Default constructor + * - `explicit uniform(RealType a, RealType b)`_ + - Constructor with parameters + * - `RealType a() const`_ + - Method to obtain left bound `a` + * - `RealType b() const`_ + - Method to obtain right bound `b` + +.. container:: section + + .. rubric:: Member types + + .. container:: section + + .. code-block:: cpp + + uniform::method_type = Method + + .. container:: section + + .. rubric:: Description + + The type which defines transformation method for generation. + + .. container:: section + + .. code-block:: cpp + + uniform::result_type = RealType + + .. container:: section + + .. rubric:: Description + + The type which defines type of generated random numbers. + +.. container:: section + + .. rubric:: Constructors + + .. container:: section + + .. _`uniform()`: + + .. code-block:: cpp + + uniform::uniform() + + .. container:: section + + .. rubric:: Description + + Default constructor for distribution, parameters set as `a` = 0.0, `b` = 1.0. + + .. container:: section + + .. _`explicit uniform(RealType a, RealType b)`: + + .. code-block:: cpp + + explicit uniform::uniform(RealType a, RealType b) + + .. container:: section + + .. rubric:: Description + + Constructor with parameters. `a` is a left bound, `b` is a right bound, assume :math:`a < b`. + + .. container:: section + + .. rubric:: Throws + + oneapi::mkl::invalid_argument + Exception is thrown when :math:`a \ge b` + +.. container:: section + + .. rubric:: Characteristics + + .. container:: section + + .. _`RealType a() const`: + + .. code-block:: cpp + + RealType uniform::a() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `a` - left bound. + + .. container:: section + + .. _`RealType b() const`: + + .. code-block:: cpp + + RealType uniform::b() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `b` - right bound. + +**Parent topic:** :ref:`onemkl_device_rng_distributions` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-uniform-discrete.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-uniform-discrete.rst new file mode 100644 index 000000000..aa55315a8 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-uniform-discrete.rst @@ -0,0 +1,177 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_uniform_discrete: + +uniform (Discrete) +================== + + +Generates random numbers uniformly distributed over the interval ``[a, b)``. + +.. rubric:: Description + +The ``uniform`` class object is used in ``generate`` and function +to provide random numbers uniformly distributed over the interval ``[a, b)``, where ``a, b`` are the left and right +bounds of the interval respectively, and :math:`a, b \in Z ; a < b`. + + +The probability distribution is given by: + +.. math:: + + P(X = k) = \frac{1}{b-a}, + k \in \{a, a + 1, \ldots, b-1\} + +The cumulative distribution function is as follows: + +.. math:: + + F_{a, b}(x) = + \begin{cases} + 0, & x < a \\ + \frac{x-a + 1}{b-a}, & a \leq x < b \\ + 1, & x \geq b + \end{cases}, + x \in R + + +class uniform +------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::rng::device { + template + class uniform { + public: + using method_type = Method; + using result_type = Type; + + uniform(); + explicit uniform(Type a, Type b); + + Type a() const; + Type b() const; + }; + } + + +.. container:: section + + .. rubric:: Template parameters + + .. container:: section + + typename Type + Type of the produced values. Supported types: + + * ``std::int32_t`` + * ``std::uint32_t`` + + .. container:: section + + typename Method = oneapi::mkl::rng::uniform_method::by_default + Transformation method, which will be used for generation. Supported types: + + * ``oneapi::mkl::rng::device::uniform_method::by_default`` + * ``oneapi::mkl::rng::device::uniform_method::standard`` + * ``oneapi::mkl::rng::device::uniform_method::accurate`` + + See description of the methods in :ref:`Distributions methods template parameter`. + + +.. container:: section + + .. rubric:: Class Members + + .. list-table:: + :header-rows: 1 + + * - Routine + - Description + * - `uniform()`_ + - Default constructor + * - `explicit uniform(Type a, Type b)`_ + - Constructor with parameters + * - `Type a() const`_ + - Method to obtain left bound `a` + * - `Type b() const`_ + - Method to obtain right bound `b` + +.. container:: section + + .. rubric:: Constructors + + .. container:: section + + .. _`uniform()`: + + .. code-block:: cpp + + uniform::uniform() + + .. container:: section + + .. rubric:: Description + + Default constructor for distribution, parameters set as `a` = 0, `b` = (1 << 23) with ``uniform_method::standard`` + or std::numeric_limits::max() with ``uniform_method::accurate``. + + .. container:: section + + .. _`explicit uniform(Type a, Type b)`: + + .. code-block:: cpp + + explicit uniform::uniform(Type a, Type b) + + .. container:: section + + .. rubric:: Description + + Constructor with parameters. `a` is a left bound, `b` is a right bound, assume :math:`a < b`. + + .. container:: section + + .. rubric:: Throws + + oneapi::mkl::invalid_argument + Exception is thrown when :math:`a \ge b` + +.. container:: section + + .. rubric:: Characteristics + + .. container:: section + + .. _`Type a() const`: + + .. code-block:: cpp + + uniform::a() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `a` - left bound. + + .. container:: section + + .. _`Type b() const`: + + .. code-block:: cpp + + uniform::b() const + + .. container:: section + + .. rubric:: Return Value + + Returns the distribution parameter `b` - right bound. + +**Parent topic:** :ref:`onemkl_device_rng_distributions` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-rng-usage-model.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-usage-model.rst new file mode 100644 index 000000000..15c65c663 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-rng-usage-model.rst @@ -0,0 +1,77 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_usage_model: + +oneMKL RNG Device Usage Model +============================= + +.. contents:: + :local: + :depth: 1 + +A typical usage model for device routines is the same as described in +:ref:`onemkl_rng_usage_model`: + + +#. Create and initialize the object for basic random number generator. + +#. Create and initialize the object for distribution generator. + +#. Call the generate routine to get random numbers with appropriate statistical distribution. + + +Example of Scalar Random Numbers Generation +------------------------------------------- + +.. code-block:: cpp + + #include "oneapi/mkl/rng/device.hpp" + + int main() { + sycl::queue q; + // Prepare a memory for random numbers + // Submit a kernel to generate on device + q.submit([&](sycl::handler& cgh) { + // ... + cgh.parallel_for(n, [=](size_t idx) { + // Create an engine object + oneapi::mkl::rng::device::philox4x32x10<> engine(seed, idx); + // Create a distribution object + oneapi::mkl::rng::device::uniform distr; + // Call generate function to obtain scalar random number + float res = oneapi::mkl::rng::device::generate(distr, engine); + // ... + }); + }); + // ... + } + +Example of Vector Random Numbers Generation +------------------------------------------- + +.. code-block:: cpp + + #include "oneapi/mkl/rng/device.hpp" + + int main() { + sycl::queue q; + // Prepare an array for random numbers + // Submit a kernel to generate on device + q.submit([&](sycl::handler& cgh) { + // ... + cgh.parallel_for((n / vec_size), [=](size_t idx) { + // Create an engine object + oneapi::mkl::rng::device::philox4x32x10 engine(seed, idx * vec_size); + // Create a distribution object + oneapi::mkl::rng::device::uniform distr; + // Call generate function to obtain random numbers + sycl::vec res = oneapi::mkl::rng::device::generate(distr, engine); + // ... + }); + }); + // ... + } + +**Parent topic:** :ref:`onemkl_device_rng_routines` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-routines.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-routines.rst new file mode 100644 index 000000000..16e7d7664 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-routines.rst @@ -0,0 +1,62 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_routines: + +Random Number Generators Device Routines +======================================== + +The main purpose of Device routines is to make them callable from your SYCL kernels; however, there are no limitations to be called from the Host. For example: + +.. code-block:: + + sycl::queue queue; + + queue.submit([&](sycl::handler& cgh) { + cgh.parallel_for(range,[=](...) { + oneapi::mkl::rng::device::routine(...); // calling routine from user's kernel code + }); + }); + + oneapi::mkl::rng::device::routine(...); // calling routine from host + +.. rubric:: Structure + +RNG domain contains two classes types: + + - Engines (basic random number generators) classes, which holds + the state of generator and is a source of independent and identically distributed random variables. + Refer to :ref:`onemkl_rng_engines_basic_random_number_generators` + for a detailed description. + - Distribution classes templates (transformation classes) for different types of statistical + distributions, for example, uniform, normal (Gaussian), binomial, + etc. These classes contain all of the distribution’s parameters + (including generation method). Refer to :ref:`onemkl_device_rng_distributions` for + a detailed description of the distributions. + +The RNG domain also contains two types of free functions: + + - Generation routines. The current routines are used to obtain random + numbers from a given engine with proper statistics defined by a + given distribution. Refer to the :ref:`onemkl_device_rng_generate_routines` + section for a detailed description. + - Service routines. The routines are used to modify the engine state. Refer to :ref:`onemkl_device_rng_service_routines` for a + description of these routines. + + +Engine classes work with both generation and service routines. Distribution classes are used in +generation routines only. Refer to the :ref:`onemkl_device_rng_usage_model` +section for the description of typical RNG scenario. + +.. toctree:: + :hidden: + + device-rng-usage-model.rst + device-rng-generate-routines.rst + device-engines.rst + device-distributions.rst + device-service-routines.rst + ../bibliography.rst + +**Parent topic:** :ref:`onemkl_rng` diff --git a/source/elements/oneMKL/source/domains/rng/device_api/device-service-routines.rst b/source/elements/oneMKL/source/domains/rng/device_api/device-service-routines.rst new file mode 100644 index 000000000..cf0418890 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/device_api/device-service-routines.rst @@ -0,0 +1,27 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_device_rng_service_routines: + +Device Service Routines +======================= + +.. tabularcolumns:: |\Y{0.4}|\Y{0.6}| + +.. list-table:: + :header-rows: 1 + + * - Routine + - Description + + * - :ref:`onemkl_device_rng_skip_ahead` + - Proceed state of engine by the skip-ahead method to skip a given number of elements from the original sequence. + +.. toctree:: + :maxdepth: 1 + :hidden: + + device-rng-skip-ahead.rst + +**Parent topic:** :ref:`onemkl_device_rng_routines` \ No newline at end of file diff --git a/source/elements/oneMKL/source/domains/rng/distributions-template-parameter-mkl-rng-method-values.rst b/source/elements/oneMKL/source/domains/rng/host_api/distributions-template-parameter-mkl-rng-method-values.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/distributions-template-parameter-mkl-rng-method-values.rst rename to source/elements/oneMKL/source/domains/rng/host_api/distributions-template-parameter-mkl-rng-method-values.rst diff --git a/source/elements/oneMKL/source/domains/rng/distributions.rst b/source/elements/oneMKL/source/domains/rng/host_api/distributions.rst similarity index 97% rename from source/elements/oneMKL/source/domains/rng/distributions.rst rename to source/elements/oneMKL/source/domains/rng/host_api/distributions.rst index d8f000357..8308f0783 100644 --- a/source/elements/oneMKL/source/domains/rng/distributions.rst +++ b/source/elements/oneMKL/source/domains/rng/host_api/distributions.rst @@ -4,8 +4,8 @@ .. _onemkl_rng_distributions: -Distributions -============= +Host Distributions +================== .. container:: @@ -23,8 +23,7 @@ Distributions generator routines and the basic random number generators. - _`Table Continuous Distribution Generators` - + _`Table Continuous Distribution Generators` .. container:: tablenoborder @@ -62,7 +61,8 @@ Distributions - Normal Multivariate (Gaussian Multivariate) distribution - _`Table Discrete Distribution Generators` + _`Table Discrete Distribution Generators` + .. container:: tablenoborder @@ -142,7 +142,7 @@ Distributions * - \ :ref:`onemkl_rng_beta`\ - `oneapi::mkl::rng::beta_method::cja_accurate`   - **Parent topic:** :ref:`onemkl_rng` + **Parent topic:** :ref:`onemkl_rng_manual_offload_routines` diff --git a/source/elements/oneMKL/source/domains/rng/engines-basic-random-number-generators.rst b/source/elements/oneMKL/source/domains/rng/host_api/engines-basic-random-number-generators.rst similarity index 97% rename from source/elements/oneMKL/source/domains/rng/engines-basic-random-number-generators.rst rename to source/elements/oneMKL/source/domains/rng/host_api/engines-basic-random-number-generators.rst index 30c74467f..5b4b6a2ce 100644 --- a/source/elements/oneMKL/source/domains/rng/engines-basic-random-number-generators.rst +++ b/source/elements/oneMKL/source/domains/rng/host_api/engines-basic-random-number-generators.rst @@ -4,8 +4,8 @@ .. _onemkl_rng_engines_basic_random_number_generators: -Engines (Basic Random Number Generators) -======================================== +Host Engines (Basic Random Number Generators) +============================================= .. container:: @@ -72,7 +72,7 @@ Engines (Basic Random Number Generators) [:ref:`Coddington94 `]. - **Parent topic:** :ref:`onemkl_rng` + **Parent topic:** :ref:`onemkl_rng_manual_offload_routines` .. container:: diff --git a/source/elements/oneMKL/source/domains/rng/generate-routine.rst b/source/elements/oneMKL/source/domains/rng/host_api/generate-routine.rst similarity index 76% rename from source/elements/oneMKL/source/domains/rng/generate-routine.rst rename to source/elements/oneMKL/source/domains/rng/host_api/generate-routine.rst index cf6165219..8c43e955c 100644 --- a/source/elements/oneMKL/source/domains/rng/generate-routine.rst +++ b/source/elements/oneMKL/source/domains/rng/host_api/generate-routine.rst @@ -4,8 +4,8 @@ .. _onemkl_rng_generate_routine: -Generate Routine -================ +Host Generate Routine +===================== .. container:: @@ -14,7 +14,7 @@ Generate Routine Entry point to obtain random numbers from a given engine with proper statistics of a given distribution. - **Parent topic:** :ref:`onemkl_rng` + **Parent topic:** :ref:`onemkl_rng_manual_offload_routines` .. toctree:: :hidden: diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-ars5.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-ars5.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-ars5.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-ars5.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-bernoulli.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-bernoulli.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-bernoulli.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-bernoulli.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-beta.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-beta.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-beta.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-beta.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-binomial.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-binomial.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-binomial.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-binomial.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-bits.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-bits.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-bits.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-bits.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-cauchy.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-cauchy.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-cauchy.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-cauchy.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-chi_square.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-chi_square.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-chi_square.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-chi_square.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-default_engine.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-default_engine.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-default_engine.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-default_engine.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-exponential.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-exponential.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-exponential.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-exponential.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-gamma.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-gamma.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-gamma.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-gamma.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-gaussian.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-gaussian.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-gaussian.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-gaussian.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-gaussian_mv.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-gaussian_mv.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-gaussian_mv.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-gaussian_mv.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-generate.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-generate.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-generate.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-generate.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-geometric.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-geometric.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-geometric.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-geometric.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-gumbel.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-gumbel.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-gumbel.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-gumbel.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-hypergeometric.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-hypergeometric.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-hypergeometric.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-hypergeometric.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-laplace.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-laplace.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-laplace.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-laplace.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-leapfrog.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-leapfrog.rst similarity index 97% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-leapfrog.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-leapfrog.rst index 5f824399c..b767ec0b6 100644 --- a/source/elements/oneMKL/source/domains/rng/mkl-rng-leapfrog.rst +++ b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-leapfrog.rst @@ -76,4 +76,4 @@ leapfrog **Parent topic:** :ref:`onemkl_rng_service_routines` -.. |image0| image:: ../equations/rng-leapfrog.png +.. |image0| image:: ../../equations/rng-leapfrog.png diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-lognormal.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-lognormal.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-lognormal.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-lognormal.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-mcg31m1.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mcg31m1.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-mcg31m1.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mcg31m1.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-mcg59.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mcg59.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-mcg59.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mcg59.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-mrg32k3a.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mrg32k3a.rst similarity index 96% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-mrg32k3a.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mrg32k3a.rst index a0cbec284..e72ff80b0 100644 --- a/source/elements/oneMKL/source/domains/rng/mkl-rng-mrg32k3a.rst +++ b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mrg32k3a.rst @@ -72,7 +72,7 @@ class mrg32k3a * - Routine - Description - * - `sycl::queue queue, std::uint32_t seed = default_seed`_ + * - `mrg32k3a(sycl::queue queue, std::uint32_t seed = default_seed)`_ - Constructor for common seed initialization of the engine * - `mrg32k3a(sycl::queue queue, std::initializer_list seed)`_ - Constructor for extended seed initialization of the engine @@ -89,11 +89,11 @@ class mrg32k3a .. rubric:: Constructors - .. _`sycl::queue queue, std::uint32_t seed = default_seed`: + .. _`mrg32k3a(sycl::queue queue, std::uint32_t seed = default_seed)`: .. code-block:: cpp - mrg32k3a::sycl::queue queue, std::uint32_t seed = default_seed + mrg32k3a::mrg32k3a(sycl::queue queue, std::uint32_t seed = default_seed) .. container:: section diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-mt19937.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mt19937.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-mt19937.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mt19937.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-mt2203.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mt2203.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-mt2203.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-mt2203.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-multinomial.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-multinomial.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-multinomial.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-multinomial.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-negbinomial.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-negbinomial.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-negbinomial.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-negbinomial.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-niederreiter.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-niederreiter.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-niederreiter.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-niederreiter.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-nondeterministic.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-nondeterministic.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-nondeterministic.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-nondeterministic.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-philox4x32x10.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-philox4x32x10.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-philox4x32x10.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-philox4x32x10.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-poisson.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-poisson.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-poisson.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-poisson.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-poisson_v.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-poisson_v.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-poisson_v.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-poisson_v.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-r250.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-r250.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-r250.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-r250.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-rayleigh.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-rayleigh.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-rayleigh.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-rayleigh.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-sfmt19937.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-sfmt19937.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-sfmt19937.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-sfmt19937.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-skip_ahead.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-skip_ahead.rst similarity index 96% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-skip_ahead.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-skip_ahead.rst index 553ff483d..f97542186 100644 --- a/source/elements/oneMKL/source/domains/rng/mkl-rng-skip_ahead.rst +++ b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-skip_ahead.rst @@ -108,7 +108,7 @@ skip_ahead (Interface with a partitioned number of skipped elements) oneapi::mkl::rng::mrg32k3a engine_1(queue, seed); // To skip 2^64 elements in the random stream number of skipped elements should be - /represented as num_to_skip = 2^64 = 0 + 1 * 2^64 + // represented as num_to_skip = 2^64 = 0 + 1 * 2^64 std::initializer_list num_to_skip = {0, 1}; // Creating the 2nd engine based on 1st. Skipping by 2^64 @@ -118,4 +118,4 @@ skip_ahead (Interface with a partitioned number of skipped elements) **Parent topic:** :ref:`onemkl_rng_service_routines` -.. |image0| image:: ../equations/rng-skip-ahead.png +.. |image0| image:: ../../equations/rng-skip-ahead.png diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-sobol.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-sobol.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-sobol.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-sobol.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-uniform-continuous.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-uniform-continuous.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-uniform-continuous.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-uniform-continuous.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-uniform-discrete.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-uniform-discrete.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-uniform-discrete.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-uniform-discrete.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-uniform_bits.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-uniform_bits.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-uniform_bits.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-uniform_bits.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-weibull.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-weibull.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-weibull.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-weibull.rst diff --git a/source/elements/oneMKL/source/domains/rng/mkl-rng-wichmann_hill.rst b/source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-wichmann_hill.rst similarity index 100% rename from source/elements/oneMKL/source/domains/rng/mkl-rng-wichmann_hill.rst rename to source/elements/oneMKL/source/domains/rng/host_api/mkl-rng-wichmann_hill.rst diff --git a/source/elements/oneMKL/source/domains/rng/host_api/onemkl-rng-usage-model.rst b/source/elements/oneMKL/source/domains/rng/host_api/onemkl-rng-usage-model.rst new file mode 100644 index 000000000..ad62a589e --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/host_api/onemkl-rng-usage-model.rst @@ -0,0 +1,68 @@ +.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_rng_usage_model: + +oneMKL RNG Host Usage Model +=========================== + + +.. rubric:: Description + +A typical algorithm for random number generators is as follows: + +1. Create and initialize the object for basic random number generator. + + - Use the `skip_ahead` or `leapfrog` function if it is required (used in parallel with random number generation for Host and CPU devices). + +2. Create and initialize the object for distribution generator. + +3. Call the generate routine to get random numbers with appropriate statistical distribution. + +The following example demonstrates random numbers generation with PHILOX4X32X10 basic generator (engine). + +Buffer-based example +-------------------- + +.. code-block:: cpp + + #include "oneapi/mkl/rng.hpp" + + int main() { + sycl::queue q; + + // Create the random number generator object + oneapi::mkl::rng::philox4x32x10 engine(q, seed); + // Create the distribution object + oneapi::mkl::rng::gaussian distr(5.0, 2.0); + // Fill the SYCL buffer with random numbers + oneapi::mkl::rng::generate(distr, engine, n, sycl_buffer); + + // ... + } + + +USM-based example +----------------- + +.. code-block:: cpp + + #include "oneapi/mkl/rng.hpp" + + int main() { + sycl::queue q; + + // Create the random number generator object + oneapi::mkl::rng::philox4x32x10 engine(q, seed); + // Create the distribution object + oneapi::mkl::rng::gaussian distr(5.0, 2.0); + // Fill the USM memory under the pointer with random numbers + auto event = oneapi::mkl::rng::generate(distr, engine, n, usm_ptr); + // ... + // wait until generation is finalized + event.wait(); + // ... + } + +**Parent topic:** :ref:`onemkl_rng_manual_offload_routines` diff --git a/source/elements/oneMKL/source/domains/rng/host_api/rng-host-routines.rst b/source/elements/oneMKL/source/domains/rng/host_api/rng-host-routines.rst new file mode 100644 index 000000000..053160201 --- /dev/null +++ b/source/elements/oneMKL/source/domains/rng/host_api/rng-host-routines.rst @@ -0,0 +1,48 @@ +.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_rng_manual_offload_routines: + +Random Number Generators Host Routines +====================================== + +.. rubric:: Structure + +RNG domain contains two classes types: + + - Engines (basic random number generators) classes, which holds + the state of generator and is a source of independent and identically distributed random variables. + Refer to :ref:`onemkl_rng_engines_basic_random_number_generators` + for a detailed description. + - Distribution classes templates (transformation classes) for different types of statistical + distributions, for example, uniform, normal (Gaussian), binomial, + etc. These classes contain all of the distribution’s parameters + (including generation method). Refer to :ref:`onemkl_rng_distributions` for + a detailed description of the distributions. + +The RNG domain also contains two types of free functions: + + - Generation routines. The current routines are used to obtain random + numbers from a given engine with proper statistics defined by a + given distribution. Refer to the :ref:`onemkl_rng_generate_routine` + section for a detailed description. + - Service routines. The routines are used to modify the engine state. Refer to :ref:`onemkl_rng_service_routines` for a + description of these routines. + + +Engine classes work with both generation and service routines. Distribution classes are used in +generation routines only. Refer to the :ref:`onemkl_rng_usage_model` +section for the description of typical RNG scenario. + +.. toctree:: + :hidden: + + onemkl-rng-usage-model.rst + generate-routine.rst + engines-basic-random-number-generators.rst + service-routines.rst + distributions.rst + ../bibliography.rst + +**Parent topic:** :ref:`onemkl_rng` \ No newline at end of file diff --git a/source/elements/oneMKL/source/domains/rng/service-routines.rst b/source/elements/oneMKL/source/domains/rng/host_api/service-routines.rst similarity index 86% rename from source/elements/oneMKL/source/domains/rng/service-routines.rst rename to source/elements/oneMKL/source/domains/rng/host_api/service-routines.rst index 78d412f3e..08c0c072d 100644 --- a/source/elements/oneMKL/source/domains/rng/service-routines.rst +++ b/source/elements/oneMKL/source/domains/rng/host_api/service-routines.rst @@ -4,8 +4,8 @@ .. _onemkl_rng_service_routines: -Service Routines -================ +Host Service Routines +===================== .. container:: @@ -26,7 +26,7 @@ Service Routines - **Parent topic:** :ref:`onemkl_rng` + **Parent topic:** :ref:`onemkl_rng_manual_offload_routines` .. toctree:: :hidden: diff --git a/source/elements/oneMKL/source/domains/rng/onemkl-rng-overview.rst b/source/elements/oneMKL/source/domains/rng/onemkl-rng-overview.rst index 7fbcd4bb8..ddf440a33 100755 --- a/source/elements/oneMKL/source/domains/rng/onemkl-rng-overview.rst +++ b/source/elements/oneMKL/source/domains/rng/onemkl-rng-overview.rst @@ -35,42 +35,21 @@ In computational statistics, random variate generation is usually made in two st in order to generate (or imitate) random variates and random vectors from arbitrary distributions. -.. rubric:: Structure -RNG domain contains two classes types: +.. rubric:: Execution Models - - Engines (basic random number generators) classes, which holds - the state of generator and is a source of i.i.d. random. Refer to - :ref:`onemkl_rng_engines_basic_random_number_generators` - for a detailed description. - - Distribution classes templates (transformation classes) for different types of statistical - distributions, for example, uniform, normal (Gaussian), binomial, - etc. These classes contain all of the distribution’s parameters - (including generation method). Refer to :ref:`onemkl_rng_distributions` for - a detailed description of the distributions. +RNG domain supports two execution models: -The RNG domain also contains two types of free functions: + #. :ref:`Host API`, which is aligned with the rest of oneMKL domains + :ref:`oneMKL domains`. + #. :ref:`Device API`, which is specific for RNG domain. These APIs + are designed to be callable from the User's kernels as well as Host code. - - Generation routines. The current routines are used to obtain random - numbers from a given engine with proper statistics defined by a - given distribution. Refer to the :ref:`onemkl_rng_generate_routine` - section for a detailed description. - - Service routines. The routines are used to modify the engine state. Refer to :ref:`onemkl_rng_service_routines` for a - description of these routines. - - -Engine classes work with both generation and service routines. Distribution classes are used in -generation routines only. Refer to the :ref:`onemkl_rng_usage_model` -section for the description of typical RNG scenario. .. toctree:: :hidden: - onemkl-rng-usage-model.rst - generate-routine.rst - engines-basic-random-number-generators.rst - service-routines.rst - distributions.rst - bibliography.rst + host_api/rng-host-routines.rst + device_api/device-routines.rst -**Parent topic:** :ref:`onemkl_rng` \ No newline at end of file +**Parent topic:** :ref:`onemkl_rng` diff --git a/source/elements/oneMKL/source/domains/rng/onemkl-rng-usage-model.rst b/source/elements/oneMKL/source/domains/rng/onemkl-rng-usage-model.rst deleted file mode 100644 index 72a38a00b..000000000 --- a/source/elements/oneMKL/source/domains/rng/onemkl-rng-usage-model.rst +++ /dev/null @@ -1,111 +0,0 @@ -.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation -.. -.. SPDX-License-Identifier: CC-BY-4.0 - -.. _onemkl_rng_usage_model: - -oneMKL RNG Usage Model -====================== - - -.. rubric:: Description - -A typical algorithm for random number generators is as follows: - -1. Create and initialize the object for basic random number generator. - - - Use the `skip_ahead` or `leapfrog` function if it is required (used in parallel with random number generation for Host and CPU devices). - -2. Create and initialize the object for distribution generator. - -3. Call the generate routine to get random numbers with appropriate statistical distribution. - -The following example demonstrates generation of random numbers that -is output of basic generator (engine) PHILOX4X32X10. The seed is -equal to 777. The generator is used to generate 10,000 normally -distributed random numbers with parameters ``a`` = 5 and ``sigma``\ = -2. The purpose of the example is to calculate the sample mean for -normal distribution with the given parameters. - -Buffer-based example --------------------- - -.. code-block:: cpp - - #include - #include - - #include "CL/sycl.hpp" - #include "oneapi/mkl/rng.hpp" - - int main() { - sycl::queue queue; - const size_t n = 10000; - const std::uint64_t seed = 777; - std::vector r(n); - - oneapi::mkl::rng::philox4x32x10 engine(queue, seed); // basic random number generator object - oneapi::mkl::rng::gaussian distr(5.0, 2.0); // distribution object - - { - //create buffer for random numbers - sycl::buffer r_buf(r.data(), r.size()); - oneapi::mkl::rng::generate(distr, engine, n, r_buf); // perform generation - } - - double s = 0.0; - for(int i = 0; i < n; i++) { - s += r[i]; - } - s /= n; - - std::cout << "Average = " << s << std::endl; - return 0; - } - - -USM-based example ------------------ - -.. code-block:: cpp - - #include - #include - - #include "CL/sycl.hpp" - #include "oneapi/mkl/rng.hpp" - - int main() { - sycl::queue queue; - const size_t n = 10000; - const std::uint64_t seed = 777; - - // create USM allocator - sycl::usm_allocator allocator(queue.get_context(), queue.get_device()); - - // create vector with USM allocator - std::vector r(n, allocator); - - oneapi::mkl::rng::philox4x32x10 engine(queue, seed); // basic random number generator object - oneapi::mkl::rng::gaussian distr(5.0, 2.0); // distribution object - - auto event = oneapi::mkl::rng::generate(distr, engine, n, r.data()); // perform generation - // sycl::event object is returned by generate function for synchronization - event.wait(); // synchronization can be also done by queue.wait() - - double s = 0.0; - for(int i = 0; i < n; i++) { - s += r[i]; - } - s /= n; - - std::cout << "Average = " << s << std::endl; - return 0; - } - - -.. rubric:: USM usage - -You can also use USM with raw pointers by using the sycl::malloc_shared/malloc_device functions. - -**Parent topic:** :ref:`onemkl_rng` diff --git a/source/elements/oneMKL/source/domains/spblas/gemm.rst b/source/elements/oneMKL/source/domains/spblas/gemm.rst index 908600e46..43dd0b0c4 100644 --- a/source/elements/oneMKL/source/domains/spblas/gemm.rst +++ b/source/elements/oneMKL/source/domains/spblas/gemm.rst @@ -191,7 +191,7 @@ gemm (USM version) const std::int64_t columns, const std::int64_t ldb, const fp beta, - const fp *C, + fp *C, const std::int64_t ldc, const std::vector &dependencies = {}); diff --git a/source/elements/oneMKL/source/domains/spblas/gemmoptimize.rst b/source/elements/oneMKL/source/domains/spblas/gemmoptimize.rst new file mode 100644 index 000000000..83a4f3ef6 --- /dev/null +++ b/source/elements/oneMKL/source/domains/spblas/gemmoptimize.rst @@ -0,0 +1,208 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_sparse_optimize_gemm: + +optimize_gemm +============= + +Performs internal optimizations for oneapi::mkl::sparse::gemm by analyzing +the matrix structure. + +.. rubric:: Description and Assumptions + +The oneapi::mkl::sparse::optimize_gemm routine analyzes matrix structure +and performs optimizations. Optimized data is then stored in +the handle. + +In contrast to other optimization routines in Sparse BLAS domain +which are done solely based on the sparse matrix pattern, +two versions of the ``sparse::optimize_gemm`` routine are provided for preparing different +optimizations for ``sparse::gemm`` routine. In particular, if the shape +of the dense matrix right hand side, :math:`B`, is unknown or widely varying in +subsequent calls to ``sparse::gemm`` then a user might reasonably +only wish to perform optimizations for ``:sparse::gemm`` with respect +to the sparse matrix structure. However, if one or more particular shapes of :math:`B` +is available, then each :math:`B` shape can be provided as an additional hint +along with the sparse matrix pattern in the call to ``sparse::optimize_gemm``. This +second version of the API with :math:`B` shape should be callable one or +more times and may allow libraries to provide more targeted performance +optimizations. + +.. _onemkl_sparse_optimize_gemm_A: + +optimize_gemm (based on Sparse Matrix) +-------------------------------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::sparse { + + sycl::event optimize_gemm (sycl::queue &queue, + oneapi::mkl::transpose transpose_A, + oneapi::mkl::sparse::matrix_handle_t A_handle, + const std::vector &dependencies = {}); + + } + +.. container:: section + + .. rubric:: Input Parameters + + queue + Specifies the SYCL command queue which will be used for SYCL + kernels execution. + + + transpose_A + Specifies operation ``op()`` on input matrix :math:`A`. The possible options + are described in :ref:`onemkl_enum_transpose` enum class. + + + A_handle + Handle to object containing sparse matrix and other internal + data. Created using the + oneapi::mkl::sparse::set_csr_data routine. + + + dependencies + List of events that oneapi::mkl::sparse::optimize_gemm routine depends on. + + +.. container:: section + + .. rubric:: Output Parameters + :class: sectiontitle + + handle + Handle might be updated with some internal optimized data by this routine. + +.. container:: section + + .. rubric:: Throws + :class: sectiontitle + + This routine shall throw the following exceptions if the associated condition is detected. + An implementation may throw additional implementation-specific exception(s) + in case of error conditions not covered here. + + | :ref:`oneapi::mkl::computation_error` + | :ref:`oneapi::mkl::device_bad_alloc` + | :ref:`oneapi::mkl::host_bad_alloc` + | :ref:`oneapi::mkl::invalid_argument` + | :ref:`oneapi::mkl::unimplemented` + | :ref:`oneapi::mkl::uninitialized` + | :ref:`oneapi::mkl::unsupported_device` + +.. container:: section + + .. rubric:: Return Values + :class: sectiontitle + + Output event that can be waited upon or added as a + dependency for the completion of optimize_gemm routine. + + +.. _onemkl_sparse_optimize_gemm_AB: + +optimize_gemm (based on Both Input Matrices) +-------------------------------------------- + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::sparse { + + sycl::event optimize_gemm (sycl::queue &queue, + oneapi::mkl::transpose transpose_A, + oneapi::mkl::transpose transpose_B, + oneapi::mkl::layout dense_matrix_layout, + const std::int64_t columns, + oneapi::mkl::sparse::matrix_handle_t A_handle, + const std::vector &dependencies = {}); + + } + +.. container:: section + + .. rubric:: Input Parameters + + queue + Specifies the SYCL command queue which will be used for SYCL + kernels execution. + + + transpose_A + Specifies operation ``op()`` on input matrix :math:`A`. The possible options + are described in :ref:`onemkl_enum_transpose` enum class. + + + transpose_B + Specifies operation ``op()`` on input matrix :math:`B`. The possible options + are described in :ref:`onemkl_enum_transpose` enum class. + + + dense_matrix_layout + Specifies the storage scheme in memory for the dense matrices. Note that this layout applies to both :math:`B` and :math:`C` dense matrices. + The possible options are described in :ref:`onemkl_enum_layout` enum class. + + + columns + Number of columns of matrix :math:`C`. + + + handle + Handle to object containing sparse matrix and other internal + data. Created using the + oneapi::mkl::sparse::set_csr_data routine. + + + dependencies + List of events that oneapi::mkl::sparse::optimize_gemm routine depends on. + + +.. container:: section + + .. rubric:: Output Parameters + :class: sectiontitle + + handle + Handle might be updated with some internal optimized data by this routine. + +.. container:: section + + .. rubric:: Throws + :class: sectiontitle + + This routine shall throw the following exceptions if the associated condition is detected. + An implementation may throw additional implementation-specific exception(s) + in case of error conditions not covered here. + + | :ref:`oneapi::mkl::computation_error` + | :ref:`oneapi::mkl::device_bad_alloc` + | :ref:`oneapi::mkl::host_bad_alloc` + | :ref:`oneapi::mkl::invalid_argument` + | :ref:`oneapi::mkl::unimplemented` + | :ref:`oneapi::mkl::uninitialized` + | :ref:`oneapi::mkl::unsupported_device` + +.. container:: section + + .. rubric:: Return Values + :class: sectiontitle + + Output event that can be waited upon or added as a + dependency for the completion of optimize_gemm routine. + + +.. container:: familylinks + + + .. container:: parentlink + + + **Parent topic:** :ref:`onemkl_spblas` diff --git a/source/elements/oneMKL/source/domains/spblas/gemv.rst b/source/elements/oneMKL/source/domains/spblas/gemv.rst index 7284ff19b..3bf93089c 100644 --- a/source/elements/oneMKL/source/domains/spblas/gemv.rst +++ b/source/elements/oneMKL/source/domains/spblas/gemv.rst @@ -126,7 +126,7 @@ gemv (USM version) oneapi::mkl::sparse::matrix_handle_t A_handle, const fp *x, const fp beta, - const fp *y, + fp *y, const std::vector &dependencies = {}); } diff --git a/source/elements/oneMKL/source/domains/spblas/gemvdot.rst b/source/elements/oneMKL/source/domains/spblas/gemvdot.rst index 349b69f4c..71d010829 100644 --- a/source/elements/oneMKL/source/domains/spblas/gemvdot.rst +++ b/source/elements/oneMKL/source/domains/spblas/gemvdot.rst @@ -45,10 +45,10 @@ gemvdot (Buffer version) void gemvdot (sycl::queue &queue, oneapi::mkl::transpose transpose_val, - fp alpha, + const fp alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, sycl::buffer &x, - fp beta, + const fp beta, sycl::buffer &y, sycl::buffer &d); @@ -135,10 +135,10 @@ gemvdot (USM version) sycl::event gemvdot (sycl::queue &queue, oneapi::mkl::transpose transpose_val, - fp alpha, + const fp alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, - fp *x, - fp beta, + const fp *x, + const fp beta, fp *y, fp *d, const std::vector &dependencies = {}); diff --git a/source/elements/oneMKL/source/domains/spblas/gemvoptimize.rst b/source/elements/oneMKL/source/domains/spblas/gemvoptimize.rst index 9c3a9570d..a9b2b64a7 100644 --- a/source/elements/oneMKL/source/domains/spblas/gemvoptimize.rst +++ b/source/elements/oneMKL/source/domains/spblas/gemvoptimize.rst @@ -17,65 +17,6 @@ and performs optimizations. Optimized data is then stored in the handle. -.. _onemkl_sparse_optimize_gemv_buffer: - -optimize_gemv (Buffer version) ------------------------------- - -.. rubric:: Syntax - -.. code-block:: cpp - - namespace oneapi::mkl::sparse { - - void optimize_gemv (sycl::queue &queue, - oneapi::mkl::transpose transpose_val, - oneapi::mkl::sparse::matrix_handle_t handle); - - } - -.. container:: section - - .. rubric:: Input Parameters - - queue - Specifies the SYCL command queue which will be used for SYCL - kernels execution. - - - transpose_val - Specifies operation ``op()`` on input matrix. The possible options - are described in :ref:`onemkl_enum_transpose` enum class. - - - handle - Handle to object containing sparse matrix and other internal - data. Created using the - oneapi::mkl::sparse::set_csr_data routine. - - -.. container:: section - - .. rubric:: Throws - :class: sectiontitle - - This routine shall throw the following exceptions if the associated condition is detected. - An implementation may throw additional implementation-specific exception(s) - in case of error conditions not covered here. - - | :ref:`oneapi::mkl::computation_error` - | :ref:`oneapi::mkl::device_bad_alloc` - | :ref:`oneapi::mkl::host_bad_alloc` - | :ref:`oneapi::mkl::invalid_argument` - | :ref:`oneapi::mkl::unimplemented` - | :ref:`oneapi::mkl::uninitialized` - | :ref:`oneapi::mkl::unsupported_device` - -.. _onemkl_sparse_optimize_gemv_usm: - -optimize_gemv (USM version) ---------------------------- - .. rubric:: Syntax .. code-block:: cpp @@ -85,7 +26,7 @@ optimize_gemv (USM version) sycl::event optimize_gemv (sycl::queue &queue, oneapi::mkl::transpose transpose_val, oneapi::mkl::sparse::matrix_handle_t handle, - std::vector &dependencies); + const std::vector &dependencies = {}); } @@ -113,6 +54,14 @@ optimize_gemv (USM version) List of events that oneapi::mkl::sparse::optimize_gemv routine depends on. +.. container:: section + + .. rubric:: Output Parameters + :class: sectiontitle + + handle + Handle might be updated with some internal optimized data by this routine. + .. container:: section .. rubric:: Throws diff --git a/source/elements/oneMKL/source/domains/spblas/matrixinit.rst b/source/elements/oneMKL/source/domains/spblas/matrixinit.rst index ca6493d21..66db35082 100644 --- a/source/elements/oneMKL/source/domains/spblas/matrixinit.rst +++ b/source/elements/oneMKL/source/domains/spblas/matrixinit.rst @@ -24,10 +24,32 @@ The oneapi::mkl::sparse::init_matrix_handle function initializes the namespace oneapi::mkl::sparse { - void init_matrix_handle (oneapi::mkl::sparse::matrix_handle_t *handle); + void init_matrix_handle (sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t *p_handle); } +.. container:: section + + + .. rubric:: Input parameters + + queue + The SYCL command queue which will be used for SYCL kernels execution. + + p_handle + The address of the sparse::matrix_handle_t ``p_handle`` object to be initialized. + This initialization routine must only be called on an uninitialized matrix_handle_t object. + +.. container:: section + + .. rubric:: Output parameters + + p_handle + On return, the address is updated to point to a newly allocated and initialized matrix_handle_t object + that can be filled and used to perform sparse BLAS operations. + + .. container:: section .. rubric:: Throws @@ -50,4 +72,4 @@ The oneapi::mkl::sparse::init_matrix_handle function initializes the .. container:: parentlink - **Parent topic:** :ref:`onemkl_spblas` \ No newline at end of file + **Parent topic:** :ref:`onemkl_spblas` diff --git a/source/elements/oneMKL/source/domains/spblas/releasematrixhandle.rst b/source/elements/oneMKL/source/domains/spblas/releasematrixhandle.rst index 9026ae156..0cfd90e77 100644 --- a/source/elements/oneMKL/source/domains/spblas/releasematrixhandle.rst +++ b/source/elements/oneMKL/source/domains/spblas/releasematrixhandle.rst @@ -26,26 +26,43 @@ before releasing any data in case of USM. namespace oneapi::mkl::sparse { - void release_matrix_handle (oneapi::mkl::sparse::matrix_handle_t handle, - const std::vector &dependencies = {}); + sycl::event release_matrix_handle (sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t *p_handle, + const std::vector &dependencies = {}); } .. container:: section - .. rubric:: Input parameter + .. rubric:: Input parameters + queue + The SYCL command queue which will be used for SYCL kernels execution. - handle - Handle to object containing sparse matrix and other internal - data. Created using one of the + p_handle + The address of the sparse::matrix_handle_t ``p_handle`` object to be released, containing sparse matrix and other internal + data. Initialized with oneapi::mkl::sparse::init_matrix_handle routine, and filled with user data using one of the oneapi::mkl::sparse::set__structure routines. dependencies - List of events that ``handle`` depends on. - The call waits on the events(if any) before resetting the ``handle`` to default values. + List of events that ``p_handle`` depends on. + The call waits on the events (if any) before resetting the ``p_handle`` to default values. +.. container:: section + + .. rubric:: Output parameters + + p_handle + The address of the sparse::matrix_handle_t ``p_handle`` that will be scheduled to be updated to point to a null object + and the passed in handle will be scheduled for deallocation and cleanup. + +.. container:: section + + .. rubric:: Return Values + + sycl::event + SYCL event which can be waited upon or added as a dependency for the completion of the deallocation and cleanup routines. .. container:: section diff --git a/source/elements/oneMKL/source/domains/spblas/setcsrstructure.rst b/source/elements/oneMKL/source/domains/spblas/setcsrstructure.rst index c845dccca..d9f482718 100644 --- a/source/elements/oneMKL/source/domains/spblas/setcsrstructure.rst +++ b/source/elements/oneMKL/source/domains/spblas/setcsrstructure.rst @@ -31,7 +31,8 @@ set_csr_data (Buffer version) namespace oneapi::mkl::sparse { - void set_csr_data (oneapi::mkl::sparse::matrix_handle_t handle, + void set_csr_data (sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, intType num_rows, intType num_cols, oneapi::mkl::index_base index, @@ -45,6 +46,9 @@ set_csr_data (Buffer version) .. rubric:: Input Parameters + queue + The SYCL command queue which will be used for SYCL kernel execution. + handle Handle to object containing sparse matrix and other internal data for subsequent DPC++ Sparse BLAS operations. @@ -122,13 +126,15 @@ set_csr_data (USM version) namespace oneapi::mkl::sparse { - void set_csr_data (oneapi::mkl::sparse::matrix_handle_t handle, - intType num_rows, - intType num_cols, - oneapi::mkl::index_base index, - intType *row_ptr, - intType *col_ind, - fp *val); + sycl::event set_csr_data (sycl::queue &queue, + oneapi::mkl::sparse::matrix_handle_t handle, + intType num_rows, + intType num_cols, + oneapi::mkl::index_base index, + intType *row_ptr, + intType *col_ind, + fp *val, + const std::vector &dependencies = {}); } @@ -136,6 +142,9 @@ set_csr_data (USM version) .. rubric:: Input Parameters + queue + The SYCL command queue which will be used for SYCL kernel execution. + handle Handle to object containing sparse matrix and other internal data for subsequent DPC++ Sparse BLAS operations. @@ -173,6 +182,9 @@ set_csr_data (USM version) non-zero elements of the input matrix. Refer to :ref:`onemkl_sparse_csr` format for detailed description of ``val`` + dependencies + A vector of type const std::vector & containing the list of events + that the oneapi::mkl::sparse::set_csr_data routine depends on. .. container:: section @@ -180,10 +192,18 @@ set_csr_data (USM version) :class: sectiontitle -handle - Handle to object containing sparse matrix and other internal - data for subsequent SYCL Sparse BLAS operations. + handle + Handle to object containing sparse matrix and other internal + data for subsequent SYCL Sparse BLAS operations. + +.. container:: section + + .. rubric:: Return Values + :class: sectiontitle + sycl::event + A sycl::event that can be used to track the completion of asynchronous events + that were enqueued during the API call that continue the chain of events from the input dependencies. .. container:: section @@ -208,4 +228,4 @@ handle .. container:: parentlink - **Parent topic:** :ref:`onemkl_spblas` \ No newline at end of file + **Parent topic:** :ref:`onemkl_spblas` diff --git a/source/elements/oneMKL/source/domains/spblas/spblas.rst b/source/elements/oneMKL/source/domains/spblas/spblas.rst index a1e57d7be..2599cc2bb 100644 --- a/source/elements/oneMKL/source/domains/spblas/spblas.rst +++ b/source/elements/oneMKL/source/domains/spblas/spblas.rst @@ -27,10 +27,14 @@ Sparse BLAS - Fills the internal CSR data structure * - :ref:`onemkl_sparse_optimize_gemv` - Optimize routine for gemv + * - :ref:`onemkl_sparse_optimize_symv` + - Optimize routine for symv * - :ref:`onemkl_sparse_optimize_trmv` - Optimize routine for trmv * - :ref:`onemkl_sparse_optimize_trsv` - Optimize routine for trsv + * - :ref:`onemkl_sparse_optimize_gemm` + - Optimize routine for gemm * - :ref:`onemkl_sparse_gemv` - Sparse matrix-dense vector product using a general sparse matrix * - :ref:`onemkl_sparse_gemvdot` @@ -56,10 +60,12 @@ Sparse BLAS releasematrixhandle setcsrstructure gemm + gemmoptimize gemv gemvdot gemvoptimize symv + symvoptimize trmv trmvoptimize trsv diff --git a/source/elements/oneMKL/source/domains/spblas/symv.rst b/source/elements/oneMKL/source/domains/spblas/symv.rst index bfb1bfc25..f51890e2d 100644 --- a/source/elements/oneMKL/source/domains/spblas/symv.rst +++ b/source/elements/oneMKL/source/domains/spblas/symv.rst @@ -39,10 +39,10 @@ symv (Buffer version) void symv (sycl::queue &queue, oneapi::mkl::uplo uplo_val, - fp alpha, + const fp alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, sycl::buffer &x, - fp beta, + const fp beta, sycl::buffer &y); } @@ -125,10 +125,10 @@ symv (USM version) sycl::event symv (sycl::queue &queue, oneapi::mkl::uplo uplo_val, - fp alpha, + const fp alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, - fp *x, - fp beta, + const fp *x, + const fp beta, fp *y, const std::vector &dependencies = {}); diff --git a/source/elements/oneMKL/source/domains/spblas/symvoptimize.rst b/source/elements/oneMKL/source/domains/spblas/symvoptimize.rst new file mode 100644 index 000000000..13a8f555e --- /dev/null +++ b/source/elements/oneMKL/source/domains/spblas/symvoptimize.rst @@ -0,0 +1,98 @@ +.. SPDX-FileCopyrightText: 2023 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _onemkl_sparse_optimize_symv: + +optimize_symv +============= + +Performs internal optimizations for oneapi::mkl::sparse::symv by analyzing +the matrix structure. + +.. rubric:: Description and Assumptions + +The oneapi::mkl::sparse::optimize_symv routine analyzes matrix structure +and performs optimizations. Optimized data is then stored in +the handle. + + +.. rubric:: Syntax + +.. code-block:: cpp + + namespace oneapi::mkl::sparse { + + sycl::event optimize_symv (sycl::queue &queue, + oneapi::mkl::uplo uplo_val, + oneapi::mkl::sparse::matrix_handle_t handle, + const std::vector &dependencies = {}); + } + +.. container:: section + + + .. rubric:: Input Parameters + + + queue + Specifies the SYCL command queue which will be used for SYCL + kernels execution. + + + uplo_val + Specifies which part is to be processed. The possible options are + described in :ref:`onemkl_enum_uplo` enum class. + + + handle + Handle to object containing sparse matrix and other internal + data. Created using the + oneapi::mkl::sparse::set_csr_data routine. + + + dependencies + List of events that oneapi::mkl::sparse::optimize_symv routine depends on. + + +.. container:: section + + .. rubric:: Output Parameters + :class: sectiontitle + + handle + Handle might be updated with some internal optimized data by this routine. + +.. container:: section + + .. rubric:: Throws + :class: sectiontitle + + This routine shall throw the following exceptions if the associated condition is detected. + An implementation may throw additional implementation-specific exception(s) + in case of error conditions not covered here. + + | :ref:`oneapi::mkl::computation_error` + | :ref:`oneapi::mkl::device_bad_alloc` + | :ref:`oneapi::mkl::host_bad_alloc` + | :ref:`oneapi::mkl::invalid_argument` + | :ref:`oneapi::mkl::unimplemented` + | :ref:`oneapi::mkl::uninitialized` + | :ref:`oneapi::mkl::unsupported_device` + +.. container:: section + + .. rubric:: Return Values + :class: sectiontitle + + Output event that can be waited upon or added as a + dependency for the completion of optimize_symv routine. + + +.. container:: familylinks + + + .. container:: parentlink + + + **Parent topic:** :ref:`onemkl_spblas` diff --git a/source/elements/oneMKL/source/domains/spblas/trmv.rst b/source/elements/oneMKL/source/domains/spblas/trmv.rst index 19ac8907c..b80e2e799 100644 --- a/source/elements/oneMKL/source/domains/spblas/trmv.rst +++ b/source/elements/oneMKL/source/domains/spblas/trmv.rst @@ -39,10 +39,10 @@ trmv (Buffer version) oneapi::mkl::uplo uplo_val oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val - fp alpha, + const fp alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, sycl::buffer &x, - fp beta, + const fp beta, sycl::buffer &y); } @@ -138,10 +138,10 @@ trmv (USM version) oneapi::mkl::uplo uplo_val oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val - fp alpha, + const fp alpha, oneapi::mkl::sparse::matrix_handle_t A_handle, - fp *x, - fp beta, + const fp *x, + const fp beta, fp *y const std::vector &dependencies = {}); diff --git a/source/elements/oneMKL/source/domains/spblas/trmvoptimize.rst b/source/elements/oneMKL/source/domains/spblas/trmvoptimize.rst index b5b645248..ba152a495 100644 --- a/source/elements/oneMKL/source/domains/spblas/trmvoptimize.rst +++ b/source/elements/oneMKL/source/domains/spblas/trmvoptimize.rst @@ -17,79 +17,6 @@ and performs optimizations. Optimized data is then stored in the handle. -.. _onemkl_sparse_optimize_trmv_buffer: - -optimize_trmv (Buffer version) ------------------------------- - -.. rubric:: Syntax - -.. code-block:: cpp - - namespace oneapi::mkl::sparse { - - void optimize_trmv (sycl::queue &queue, - oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, - oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t handle); - } - -.. container:: section - - - .. rubric:: Input Parameters - - - queue - Specifies the SYCL command queue which will be used for SYCL - kernels execution. - - - uplo_val - Specifies which part is to be processed. The possible options are - described in :ref:`onemkl_enum_uplo` enum class. - - - transpose_val - Specifies operation ``op()`` on input matrix. The possible options - are described in :ref:`onemkl_enum_transpose` enum class. - - - diag_val - Specifies if the diagonal is unit or not. The possible options - are described in :ref:`onemkl_enum_diag` enum class. - - - handle - Handle to object containing sparse matrix and other internal - data. Created using the - oneapi::mkl::sparse::set_csr_data routine. - - -.. container:: section - - .. rubric:: Throws - :class: sectiontitle - - This routine shall throw the following exceptions if the associated condition is detected. - An implementation may throw additional implementation-specific exception(s) - in case of error conditions not covered here. - - | :ref:`oneapi::mkl::computation_error` - | :ref:`oneapi::mkl::device_bad_alloc` - | :ref:`oneapi::mkl::host_bad_alloc` - | :ref:`oneapi::mkl::invalid_argument` - | :ref:`oneapi::mkl::unimplemented` - | :ref:`oneapi::mkl::uninitialized` - | :ref:`oneapi::mkl::unsupported_device` - - -.. _onemkl_sparse_optimize_trmv_usm: - -optimize_trmv (USM version) ---------------------------- - .. rubric:: Syntax .. code-block:: cpp @@ -101,7 +28,7 @@ optimize_trmv (USM version) oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, oneapi::mkl::sparse::matrix_handle_t handle, - std::vector &dependencies); + const std::vector &dependencies = {}); } .. container:: section @@ -140,6 +67,14 @@ optimize_trmv (USM version) List of events that oneapi::mkl::sparse::optimize_trmv routine depends on. +.. container:: section + + .. rubric:: Output Parameters + :class: sectiontitle + + handle + Handle might be updated with some internal optimized data by this routine. + .. container:: section .. rubric:: Throws diff --git a/source/elements/oneMKL/source/domains/spblas/trsv.rst b/source/elements/oneMKL/source/domains/spblas/trsv.rst index 4634c953b..4e909e3dd 100644 --- a/source/elements/oneMKL/source/domains/spblas/trsv.rst +++ b/source/elements/oneMKL/source/domains/spblas/trsv.rst @@ -82,11 +82,6 @@ trsv (Buffer version) equal to the number of columns of matrix :math:`\text{op}(A)`. - y - SYCL memory object containing an array of size at least - equal to the number of rows of matrix :math:`\text{op}(A)`. - - .. container:: section @@ -129,7 +124,7 @@ trsv (USM version) oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val oneapi::mkl::sparse::matrix_handle_t A_handle, - fp *x, + const fp *x, fp *y const std::vector &dependencies = {}); @@ -171,11 +166,6 @@ trsv (USM version) equal to the number of columns of matrix :math:`\text{op}(A)`. - y - Device-accessible USM object containing an array of size at least - equal to the number of rows of matrix :math:`\text{op}(A)`. - - dependencies List of events that oneapi::mkl::sparse::trmv routine depends on. If omitted, defaults to no dependencies. diff --git a/source/elements/oneMKL/source/domains/spblas/trsvoptimize.rst b/source/elements/oneMKL/source/domains/spblas/trsvoptimize.rst index 66990cbbb..2caa41bc7 100644 --- a/source/elements/oneMKL/source/domains/spblas/trsvoptimize.rst +++ b/source/elements/oneMKL/source/domains/spblas/trsvoptimize.rst @@ -17,78 +17,6 @@ and performs optimizations. Optimized data is then stored in the handle. -.. _onemkl_sparse_optimize_trsv_buffer: - -optimize_trsv (Buffer version) ------------------------------- - -.. rubric:: Syntax - -.. code-block:: cpp - - namespace oneapi::mkl::sparse { - - void optimize_trsv (sycl::queue &queue, - oneapi::mkl::uplo uplo_val, - oneapi::mkl::transpose transpose_val, - oneapi::mkl::diag diag_val, - oneapi::mkl::sparse::matrix_handle_t handle); - } - -.. container:: section - - - .. rubric:: Input Parameters - - - queue - Specifies the SYCL command queue which will be used for SYCL - kernels execution. - - - uplo_val - Specifies which part is to be processed. The possible options are - described in :ref:`onemkl_enum_uplo` enum class. - - - transpose_val - Specifies operation ``op()`` on input matrix. The possible options - are described in :ref:`onemkl_enum_transpose` enum class. - - - diag_val - Specifies if the diagonal is unit or not. The possible options - are described in :ref:`onemkl_enum_diag` enum class. - - - handle - Handle to object containing sparse matrix and other internal - data. Created using the - oneapi::mkl::sparse::set_csr_data routine. - - -.. container:: section - - .. rubric:: Throws - :class: sectiontitle - - This routine shall throw the following exceptions if the associated condition is detected. - An implementation may throw additional implementation-specific exception(s) - in case of error conditions not covered here. - - | :ref:`oneapi::mkl::computation_error` - | :ref:`oneapi::mkl::device_bad_alloc` - | :ref:`oneapi::mkl::host_bad_alloc` - | :ref:`oneapi::mkl::invalid_argument` - | :ref:`oneapi::mkl::unimplemented` - | :ref:`oneapi::mkl::uninitialized` - | :ref:`oneapi::mkl::unsupported_device` - -.. _onemkl_sparse_optimize_trsv_usm: - -optimize_trmv (USM version) ------------------------------- - .. rubric:: Syntax @@ -101,7 +29,7 @@ optimize_trmv (USM version) oneapi::mkl::transpose transpose_val, oneapi::mkl::diag diag_val, oneapi::mkl::sparse::matrix_handle_t handle, - std::vector &dependencies); + const std::vector &dependencies = {}); } .. container:: section @@ -140,6 +68,14 @@ optimize_trmv (USM version) List of events that oneapi::mkl::sparse::optimize_trsv routine depends on. +.. container:: section + + .. rubric:: Output Parameters + :class: sectiontitle + + handle + Handle might be updated with some internal optimized data by this routine. + .. container:: section .. rubric:: Throws diff --git a/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst b/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst index e595031c8..56b235393 100755 --- a/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst +++ b/source/elements/oneMKL/source/domains/stats/onemkl_stats_dataset.rst @@ -30,10 +30,7 @@ structure dataset (Buffer version) explicit dataset(std::int64_t n_dims_, std::int64_t n_observations_, sycl::buffer observations_, sycl::buffer weights_ = {0}, - sycl::buffer indices_ = {0}) : - n_dims(n_dims_), n_observations(n_observations_), - observations(observations_), - weights(weights_), indices(indices_) {}; + sycl::buffer indices_ = {0}); std::int64_t n_dims; std::int64_t n_observations; @@ -91,7 +88,7 @@ structure dataset (Buffer version) explicit dataset::dataset(std::int64_t n_dims_, std::int64_t n_observations_, sycl::buffer observations_, sycl::buffer weights_ = {0}, - sycl::buffer indices_ = {0}) + sycl::buffer indices_ = {0}); .. container:: section @@ -102,8 +99,8 @@ structure dataset (Buffer version) * `n_dims_` is the number of dimensions * `n_observations_` is the number of observations * `observations_` is the matrix of observations - * `weights_` is an optional parameter, represents array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1. - * `indices_` is an optional parameter, represents array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed. + * `weights_` is an optional parameter, represents an array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1. + * `indices_` is an optional parameter, represents an array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed. .. container:: section @@ -125,10 +122,7 @@ structure dataset (USM version) template struct dataset { explicit dataset(std::int64_t n_dims_, std::int64_t n_observations_, Type* observations_, - Type* weights_ = nullptr, std::int64_t* indices_ = nullptr) : - n_dims(n_dims_), n_observations(n_observations_), - observations(observations_), - weights(weights_), indices(indices_) {}; + Type* weights_ = nullptr, std::int64_t* indices_ = nullptr); std::int64_t n_dims; std::int64_t n_observations; @@ -186,7 +180,7 @@ structure dataset (USM version) explicit dataset::dataset(std::int64_t n_dims_, std::int64_t n_observations_, Type* observations_, Type* weights_ = nullptr, - std::int64_t* indices_ = nullptr) + std::int64_t* indices_ = nullptr); .. container:: section @@ -197,8 +191,8 @@ structure dataset (USM version) * `n_dims_` is the number of dimensions * `n_observations_` is the number of observations * `observations_` is the matrix of observations - * `weights_` is an optional parameter, represents array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1. - * `indices_` is an optional parameter, represents array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed. + * `weights_` is an optional parameter, represents an array of weights for observations (of size `n_observations`). If the parameter is not specified, each observation is assigned a weight equal 1. + * `indices_` is an optional parameter, represents an array of dimensions that are processed (of size `n_dims`). If the parameter is not specified, all dimensions are processed. .. container:: section diff --git a/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst b/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst index eee2dc16d..bb6d634ab 100755 --- a/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst +++ b/source/elements/oneMKL/source/domains/stats/onemkl_stats_usage_model.rst @@ -20,92 +20,32 @@ A typical algorithm for summary statistics is as follows: The following example demonstrates how to calculate mean values for a 3-dimensional dataset filled with random numbers. For dataset creation, the :ref:`onemkl_stats_make_dataset` helper function is used. -Buffer-based example --------------------- - -.. code-block:: cpp - - #include - #include - - #include "CL/sycl.hpp" - #include "oneapi/mkl/stats.hpp" - - int main() { - sycl::queue queue; - - const size_t n_observations = 1000; - const size_t n_dims = 3; - std::vector x(n_observations * n_dims); - // fill x storage with random numbers - for(int i = 0; i < n_dims, i++) { - for(int j = 0; j < n_observations; j++) { - x[j + i * n_observations] = float(std::rand()) / float(RAND_MAX); - } - } - //create buffer for dataset - sycl::buffer x_buf(x.data(), x.size()); - // create buffer for mean values - sycl::buffer mean_buf(n_dims); - // create oneapi::mkl::stats::dataset - auto dataset = oneapi::mkl::stats::make_dataset(n_dims, n_observations, x_buf); - - - oneapi::mkl::stats::mean(queue, dataset, mean_buf); - - - // create host accessor for mean_buf to print results - auto acc = mean_buf.template get_access(); - - - for(int i = 0; i < n_dims; i++) { - std::cout << "Mean value for dimension " << i << ": " << acc[i] << std::endl; - } - return 0; - } - - USM-based example ----------------- .. code-block:: cpp - #include - #include - - #include "CL/sycl.hpp" #include "oneapi/mkl/stats.hpp" int main() { sycl::queue queue; - const size_t n_observations = 1000; - const size_t n_dims = 3; + constexpr std::size_t n_observations = 1000; + constexpr std::size_t n_dims = 3; - sycl::usm_allocator allocator(queue); + // allocate Unified Shared Memory for the dataset of the size n_observations * n_dims and fill it with any data + // allocate Unified Shared Memory for the mean output of the size n_dims - std::vector x(n_observations * n_dims, allocator); - // fill x storage with random numbers - for(int i = 0; i < n_dims, i++) { - for(int j = 0; j < n_observations; j++) { - x[j + i * n_observations] = float(std::rand()) / float(RAND_MAX); - } - } - std::vector mean_buf(n_dims, allocator); // create oneapi::mkl::stats::dataset - auto dataset = oneapi::mkl::stats::make_dataset(n_dims, n_observations, x); - - sycl::event event = oneapi::mkl::stats::mean(queue, dataset, mean); - event.wait(); - for(int i = 0; i < n_dims; i++) { - std::cout << "Mean value for dimension " << i << ": " << mean[i] << std::endl; - } - return 0; - } + auto dataset = oneapi::mkl::stats::make_dataset(n_dims, n_observations, dataset_ptr); + // call statistics computation routine + auto event = oneapi::mkl::stats::mean(queue, dataset, mean_ptr); -.. rubric:: USM usage + // wait until computations are completed + event.wait(); -You can also use USM with raw pointers by using the sycl::malloc_shared/malloc_device functions. + // ... + } **Parent topic:** :ref:`onemkl_stats` diff --git a/source/elements/oneMKL/source/domains/value_or_pointer.rst b/source/elements/oneMKL/source/domains/value_or_pointer.rst new file mode 100644 index 000000000..aef22f5a3 --- /dev/null +++ b/source/elements/oneMKL/source/domains/value_or_pointer.rst @@ -0,0 +1,104 @@ +.. SPDX-FileCopyrightText: 2019-2020 Intel Corporation +.. +.. SPDX-License-Identifier: CC-BY-4.0 + +.. _value_or_pointer: + +Scalar Arguments in BLAS +======================== + +.. container:: + + The USM version of oneMKL BLAS routines for DPC++ will accept either + a scalar (for example ``float``) or pointer (``float*``) for parameters + that represent a single fixed value (not a vector or matrix). These + parameters are often named ``alpha`` or ``beta`` in BLAS. + + .. container:: section + + .. rubric:: Basic Use + :name: basic-use + :class: sectiontitle + + Users can call ``gemv`` with pointers: + + .. code-block:: + + float *alpha_ptr = sycl::malloc_shared(1, queue); + float *beta_ptr = sycl::malloc_shared(1, queue); + // fill alpha_ptr and beta_ptr with desired values + oneapi::mkl::blas::column_major::gemv(queue, trans, m, n, alpha_ptr, lda, x, incx, beta_ptr, + y, incy).wait(); + + or with literal values: + + .. code-block:: + + oneapi::mkl::blas::column_major::gemv(queue, trans, m, n, 2, lda, x, incx, 2.7, + y, incy).wait(); + + Users can even mix scalar and pointer parameters in a single call: + + .. code-block:: + + float *alpha_ptr = sycl::malloc_shared(1, queue); + oneapi::mkl::blas::column_major::gemv(queue, trans, m, n, alpha_ptr, lda, x, incx, 2.7, + y, incy).wait(); + + Pointers provided for scalar parameters may be SYCL-managed pointers + to either device or host memory (for example pointers created with + ``sycl::malloc_device``, ``sycl::malloc_shared``, or + ``sycl::malloc_host``), or they may be raw pointers created with + ``malloc`` or ``new``. + + For most users, this is all they need to know. A few details about how + this is implemented are provided below. + + .. container:: section + + .. rubric:: Wrapper type + :name: wrapper-time + :class: sectiontitle + + The USM version of oneMKL BLAS routines use a templated + ``value_or_pointer`` wrapper to enable either pointers or values + to be passed to routines that take a scalar parameter. + + In general, users should not explicitly use this type in their + code. There is no need to construct an object of type + ``value_or_pointer`` in order to use the oneMKL functions that + include it in their function signatures. Instead, values and pointers + in user code will be implicitly converted to this type when a user + calls a oneMKL function. + + The ``value_or_pointer`` wrapper has two constructors, one that + converts a value of type ``T`` (or anything convertible to ``T``) to + ``value_or_pointer``, and another that converts a pointer to ``T`` + to ``value_or_pointer``. Internally, the oneMKL functions can + behave slightly differently depending on whether the underlying data + is a value or a pointer, and if it points to host-side memory or + device-side memory, but these uses should be transparent to users. + + .. container:: section + + .. rubric:: Dependencies + :name: dependencies + :class: sectiontitle + + For scalar parameters passed to oneMKL BLAS routines as pointers, the + timing of pointer dereferencing depends on whether it is a USM-managed + pointer or a raw pointer. + + For a USM-managed pointer, it is dereferenced at kernel launch after + the dependencies passed to the function have been resolved, so the + value may be assigned asynchronously in another event passed as a + dependency to the routine. + + A raw pointer (such as those allocated with ``malloc`` or ``new``) is + dereferenced at the function call, so it must be valid when the + function is called. In this case the data must be valid when the + function is called and it may not be assigned asynchronously. + + + **Parent topic:** :ref:`onemkl_dense_linear_algebra` + diff --git a/source/elements/oneTBB/source/algorithms/functions/feeder.rst b/source/elements/oneTBB/source/algorithms/functions/feeder.rst index 93a093ff2..dc2a73f72 100644 --- a/source/elements/oneTBB/source/algorithms/functions/feeder.rst +++ b/source/elements/oneTBB/source/algorithms/functions/feeder.rst @@ -33,10 +33,13 @@ Member functions Adds item to a collection of work items to be processed. + **Requirements**: The ``Item`` type must meet the `CopyConstructible` requirements from the [copyconstructible] section of the ISO C++ Standard. + .. cpp:function:: void add( Item&& item ) Same as the above but uses the move constructor of ``Item``, if available. - + + **Requirements**: The ``Item`` type must meet the `MoveConstructible` requirements from the [moveconstructible] section of the ISO C++ Standard. .. caution:: diff --git a/source/elements/oneTBB/source/algorithms/functions/parallel_for_each_func.rst b/source/elements/oneTBB/source/algorithms/functions/parallel_for_each_func.rst index cd70f061d..1133c8faa 100644 --- a/source/elements/oneTBB/source/algorithms/functions/parallel_for_each_func.rst +++ b/source/elements/oneTBB/source/algorithms/functions/parallel_for_each_func.rst @@ -38,8 +38,11 @@ Requirements: * The ``Body`` type must meet the :doc:`ParallelForEachBody requirements <../../named_requirements/algorithms/par_for_each_body>`. Alternatively, since C++17 a pointer to a member function in ``Index`` might be used as a ``Body``. -* The ``InputIterator`` type must meet the `Input Iterator` requirements from the [input.iterators] ISO C++ Standard section. +* The ``InputIterator`` type must meet the `Input Iterator` requirements from the [input.iterators] section of the ISO C++ Standard. +* If ``InputIterator`` type does not meet the `Forward Iterator` requirements from the [forward.iterators] section of the ISO C++ Standard, + the ``std::iterator_traits::value_type`` type must be constructible from ``std::iterator_traits::reference``. * The ``Container`` type must meet the :doc:`ContainerBasedSequence requirements <../../named_requirements/algorithms/container_based_sequence>`. +* The type returned by ``Container::begin()`` must meet the same requirements as the ``InputIterator`` type above. The ``parallel_for_each`` template has two forms. diff --git a/source/elements/oneTBB/source/algorithms/functions/parallel_reduce_func.rst b/source/elements/oneTBB/source/algorithms/functions/parallel_reduce_func.rst index 69ec962bf..1bce09f5f 100644 --- a/source/elements/oneTBB/source/algorithms/functions/parallel_reduce_func.rst +++ b/source/elements/oneTBB/source/algorithms/functions/parallel_reduce_func.rst @@ -50,6 +50,8 @@ Requirements: * The ``Range`` type must meet the :doc:`Range requirements <../../named_requirements/algorithms/range>`. * The ``Body`` type must meet the :doc:`ParallelReduceBody requirements <../../named_requirements/algorithms/par_reduce_body>`. +* The ``Value`` type must meet the `CopyConstructible` requirements from the [copyconstructible] section and + `CopyAssignable` requirements from the [copyassignable] section of the ISO C++ Standard. * The ``Func`` type must meet the :doc:`ParallelReduceFunc requirements <../../named_requirements/algorithms/par_reduce_func>`. Alternatively, since C++17 a pointer to a const member function in ``Range`` taking ``const Value&`` argument and returning ``Value`` might be used as a ``Func``. diff --git a/source/elements/oneTBB/source/algorithms/functions/parallel_scan_func.rst b/source/elements/oneTBB/source/algorithms/functions/parallel_scan_func.rst index 7430703d2..c56e41794 100644 --- a/source/elements/oneTBB/source/algorithms/functions/parallel_scan_func.rst +++ b/source/elements/oneTBB/source/algorithms/functions/parallel_scan_func.rst @@ -32,6 +32,8 @@ Requirements: * The ``Range`` type must meet the :doc:`Range requirement <../../named_requirements/algorithms/range>`. * The ``Body`` type must meet the :doc:`ParallelScanBody requirements <../../named_requirements/algorithms/par_scan_body>`. +* The ``Value`` type must meet the `CopyConstructible` requirements from the [copyconstructible] section and + `CopyAssignable` requirements from the [copyassignable] section of the ISO C++ Standard. * The ``Scan`` type must meet the :doc:`ParallelScanFunc requirements <../../named_requirements/algorithms/par_scan_func>`. Alternatively, since C++17 a pointer to a const member function in ``Range`` taking ``const Value&`` and ``bool`` arguments and returning ``Value`` might be used as a ``Scan``. diff --git a/source/elements/oneTBB/source/named_requirements/algorithms/filter_body.rst b/source/elements/oneTBB/source/named_requirements/algorithms/filter_body.rst index e3326ae84..9813fa76e 100644 --- a/source/elements/oneTBB/source/named_requirements/algorithms/filter_body.rst +++ b/source/elements/oneTBB/source/named_requirements/algorithms/filter_body.rst @@ -25,7 +25,7 @@ A type `Body` should meet one of the following requirements depending on the fil .. namespace:: FirstFilterBody -.. cpp:function:: OutputType Body::operator()( oneapi::tbb::flow_control fc ) const +.. cpp:function:: OutputType Body::operator()( oneapi::tbb::flow_control& fc ) const Returns the next item from an input stream. Calls ``fc.stop()`` at the end of an input stream. @@ -45,7 +45,7 @@ A type `Body` should meet one of the following requirements depending on the fil .. namespace:: SingleFilterBody -.. cpp:function:: void Body::operator()( oneapi::tbb::flow_control fc ) const +.. cpp:function:: void Body::operator()( oneapi::tbb::flow_control& fc ) const Processes an element from an input stream. Calls ``fc.stop()`` at the end of an input stream. diff --git a/source/elements/oneTBB/source/named_requirements/algorithms/par_for_index.rst b/source/elements/oneTBB/source/named_requirements/algorithms/par_for_index.rst index e166b526f..6b9f1c3e1 100644 --- a/source/elements/oneTBB/source/named_requirements/algorithms/par_for_index.rst +++ b/source/elements/oneTBB/source/named_requirements/algorithms/par_for_index.rst @@ -25,31 +25,48 @@ A type `Index` satisfies `ParallelForIndex` if it meets the following requiremen Destructor. -.. cpp:function:: void operator=( const Index& ) +.. cpp:function:: Index& operator=( const Index& ) Assignment. - .. note:: +.. cpp:function:: Index& operator++() - The return type ``void`` in the pseudo-signature denotes that - ``operator=`` is not required to return a value. The actual ``operator=`` - can return a value, which will be ignored. + Adjust ``*this`` to the next value. .. cpp:function:: bool operator<( const Index& i, const Index& j ) Value of *i* precedes value of *j*. +.. cpp:function:: bool operator<=( const Index& i, const Index& j ) + + Value of *i* precedes or equal to the value of *j*. + .. cpp:function:: D operator-( const Index& i, const Index& j ) Number of values in range ``[i,j)``. +.. cpp:function:: Index operator+( const Index& i, const Index& j ) + + Sum of *i* and *j* values. + .. cpp:function:: Index operator+( const Index& i, D k ) *k*-th value after *i*. +.. cpp:function:: Index operator*( const Index& i, const Index& j ) + + Multiplication of *i* and *j* values. + +.. cpp:function:: Index operator/( const Index& i, const Index& j ) + + Quotient of *i* and *j* values. + ``D`` is the type of the expression ``j-i``. It can be any integral type that is convertible to ``size_t``. Examples that model the Index requirements are integral types and pointers. +**_NOTE:_** It is recommended to use integral types as ``ParallelForIndex``. See the ``[basic.fundamental]`` +section of the ISO C++ Standard for information about integral types. + See also: * :doc:`parallel_for algorithm <../../algorithms/functions/parallel_for_func>` diff --git a/source/elements/oneVPL/Doxyfile b/source/elements/oneVPL/Doxyfile deleted file mode 100644 index 473849c2d..000000000 --- a/source/elements/oneVPL/Doxyfile +++ /dev/null @@ -1,2558 +0,0 @@ -# SPDX-FileCopyrightText: 2019-2020 Intel Corporation -# -# SPDX-License-Identifier: MIT - -# Doxyfile 1.8.16 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project. -# -# All text after a double hash (##) is considered a comment and is placed in -# front of the TAG it is preceding. -# -# All text after a single hash (#) is considered a comment and will be ignored. -# The format is: -# TAG = value [value, ...] -# For lists, items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (\" \"). - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the configuration -# file that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# https://www.gnu.org/software/libiconv/ for the list of possible encodings. -# The default value is: UTF-8. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by -# double-quotes, unless you are using Doxywizard) that should identify the -# project for which the documentation is generated. This name is used in the -# title of most generated pages and in a few other places. -# The default value is: My Project. - -PROJECT_NAME = "oneVPL" - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. This -# could be handy for archiving the generated documentation or if some version -# control system is used. - -PROJECT_NUMBER = - -# Using the PROJECT_BRIEF tag one can provide an optional one line description -# for a project that appears at the top of each page and should give viewer a -# quick idea about the purpose of the project. Keep the description short. - -PROJECT_BRIEF = - -# With the PROJECT_LOGO tag one can specify a logo or an icon that is included -# in the documentation. The maximum height of the logo should not exceed 55 -# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy -# the logo to the output directory. - -PROJECT_LOGO = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path -# into which the generated documentation will be written. If a relative path is -# entered, it will be relative to the location where doxygen was started. If -# left blank the current directory will be used. - -OUTPUT_DIRECTORY = doxygen - -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this -# option can be useful when feeding doxygen a huge amount of source files, where -# putting all generated files in the same directory would otherwise causes -# performance problems for the file system. -# The default value is: NO. - -CREATE_SUBDIRS = NO - -# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII -# characters to appear in the names of generated files. If set to NO, non-ASCII -# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode -# U+3044. -# The default value is: NO. - -ALLOW_UNICODE_NAMES = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. -# The default value is: English. - -OUTPUT_LANGUAGE = English - -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -#OUTPUT_TEXT_DIRECTION = None - -# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member -# descriptions after the members that are listed in the file and class -# documentation (similar to Javadoc). Set to NO to disable this. -# The default value is: YES. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief -# description of a member or function before the detailed description -# -# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. -# The default value is: YES. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator that is -# used to form the text in various listings. Each string in this list, if found -# as the leading text of the brief description, will be stripped from the text -# and the result, after processing the whole list, is used as the annotated -# text. Otherwise, the brief description is used as-is. If left blank, the -# following values are used ($name is automatically replaced with the name of -# the entity):The $name class, The $name widget, The $name file, is, provides, -# specifies, contains, represents, a, an and the. - -ABBREVIATE_BRIEF = "The $name class" \ - "The $name widget" \ - "The $name file" \ - is \ - provides \ - specifies \ - contains \ - represents \ - a \ - an \ - the - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# doxygen will generate a detailed section even if there is only a brief -# description. -# The default value is: NO. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. -# The default value is: NO. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path -# before files name in the file list and in the header files. If set to NO the -# shortest path that makes the file name unique will be used -# The default value is: YES. - -FULL_PATH_NAMES = YES - -# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. -# Stripping is only done if one of the specified strings matches the left-hand -# part of the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the path to -# strip. -# -# Note that you can specify absolute paths here, but also relative paths, which -# will be relative from the directory where doxygen is started. -# This tag requires that the tag FULL_PATH_NAMES is set to YES. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the -# path mentioned in the documentation of a class, which tells the reader which -# header file to include in order to use a class. If left blank only the name of -# the header file containing the class definition is used. Otherwise one should -# specify the list of include paths that are normally passed to the compiler -# using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but -# less readable) file names. This can be useful is your file systems doesn't -# support long names like on DOS, Mac, or CD-ROM. -# The default value is: NO. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the -# first line (until the first dot) of a Javadoc-style comment as the brief -# description. If set to NO, the Javadoc-style will behave just like regular Qt- -# style comments (thus requiring an explicit @brief command for a brief -# description.) -# The default value is: NO. - -JAVADOC_AUTOBRIEF = NO - -# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line -# such as -# /*************** -# as being the beginning of a Javadoc-style comment "banner". If set to NO, the -# Javadoc-style will behave just like regular comments and it will not be -# interpreted by doxygen. -# The default value is: NO. - -# JAVADOC_BANNER = NO - -# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first -# line (until the first dot) of a Qt-style comment as the brief description. If -# set to NO, the Qt-style will behave just like regular Qt-style comments (thus -# requiring an explicit \brief command for a brief description.) -# The default value is: NO. - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a -# multi-line C++ special comment block (i.e. a block of //! or /// comments) as -# a brief description. This used to be the default behavior. The new default is -# to treat a multi-line C++ comment block as a detailed description. Set this -# tag to YES if you prefer the old behavior instead. -# -# Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. -# The default value is: NO. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the -# documentation from any documented member that it re-implements. -# The default value is: YES. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new -# page for each member. If set to NO, the documentation of a member will be part -# of the file/class/namespace that contains it. -# The default value is: NO. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen -# uses this value to replace tabs by spaces in code fragments. -# Minimum value: 1, maximum value: 16, default value: 4. - -TAB_SIZE = 4 - -# This tag can be used to specify a number of aliases that act as commands in -# the documentation. An alias has the form: -# name=value -# For example adding -# "sideeffect=@par Side Effects:\n" -# will allow you to put the command \sideeffect (or @sideeffect) in the -# documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) - -ALIASES = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. For -# instance, some of the names that are used will be different. The list of all -# members will be omitted, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or -# Python sources only. Doxygen will then generate output that is more tailored -# for that language. For instance, namespaces will be presented as packages, -# qualified scopes will look different, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources. Doxygen will then generate output that is tailored for Fortran. -# The default value is: NO. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for VHDL. -# The default value is: NO. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice -# sources only. Doxygen will then generate output that is more tailored for that -# language. For instance, namespaces will be presented as modules, types will be -# separated into more groups, etc. -# The default value is: NO. - -# OPTIMIZE_OUTPUT_SLICE = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given -# extension. Doxygen has a built-in mapping, but you can override or extend it -# using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, Javascript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: -# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser -# tries to guess whether the code is fixed or free formatted code, this is the -# default for Fortran type files), VHDL, tcl. For instance to make doxygen treat -# .inc files as Fortran files (default is PHP), and .f files as C (default is -# Fortran), use: inc=Fortran f=C. -# -# Note: For files without extension you can use no_extension as a placeholder. -# -# Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments -# according to the Markdown format, which allows for more readable -# documentation. See https://daringfireball.net/projects/markdown/ for details. -# The output of markdown processing is further processed by doxygen, so you can -# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in -# case of backward compatibilities issues. -# The default value is: YES. - -MARKDOWN_SUPPORT = YES - -# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up -# to that level are automatically included in the table of contents, even if -# they do not have an id attribute. -# Note: This feature currently applies only to Markdown headings. -# Minimum value: 0, maximum value: 99, default value: 5. -# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. - -TOC_INCLUDE_HEADINGS = 5 - -# When enabled doxygen tries to link words that correspond to documented -# classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by putting a % sign in front of the word or -# globally by setting AUTOLINK_SUPPORT to NO. -# The default value is: YES. - -AUTOLINK_SUPPORT = YES - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should set this -# tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); -# versus func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. -# The default value is: NO. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. -# The default value is: NO. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen -# will parse them like normal C++ but will assume all classes use public instead -# of private inheritance when no explicit protection keyword is present. -# The default value is: NO. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate -# getter and setter methods for a property. Setting this option to YES will make -# doxygen to replace the get and set methods by a property in the documentation. -# This will only work if the methods are indeed getting or setting a simple -# type. If this is not the case, or you want to show the methods anyway, you -# should set this option to NO. -# The default value is: YES. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. -# The default value is: NO. - -DISTRIBUTE_GROUP_DOC = NO - -# If one adds a struct or class to a group and this option is enabled, then also -# any nested class or struct is added to the same group. By default this option -# is disabled and one has to add nested compounds explicitly via \ingroup. -# The default value is: NO. - -GROUP_NESTED_COMPOUNDS = NO - -# Set the SUBGROUPING tag to YES to allow class member groups of the same type -# (for instance a group of public functions) to be put as a subgroup of that -# type (e.g. under the Public Functions section). Set it to NO to prevent -# subgrouping. Alternatively, this can be done per class using the -# \nosubgrouping command. -# The default value is: YES. - -SUBGROUPING = YES - -# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions -# are shown inside the group in which they are included (e.g. using \ingroup) -# instead of on a separate page (for HTML and Man pages) or section (for LaTeX -# and RTF). -# -# Note that this feature does not work in combination with -# SEPARATE_MEMBER_PAGES. -# The default value is: NO. - -INLINE_GROUPED_CLASSES = NO - -# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions -# with only public data fields or simple typedef fields will be shown inline in -# the documentation of the scope in which they are defined (i.e. file, -# namespace, or group documentation), provided this scope is documented. If set -# to NO, structs, classes, and unions are shown on a separate page (for HTML and -# Man pages) or section (for LaTeX and RTF). -# The default value is: NO. - -INLINE_SIMPLE_STRUCTS = NO - -# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or -# enum is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically be -# useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. -# The default value is: NO. - -TYPEDEF_HIDES_STRUCT = NO - -# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This -# cache is used to resolve symbols given their name and scope. Since this can be -# an expensive process and often the same symbol appears multiple times in the -# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small -# doxygen will become slower. If the cache is too large, memory is wasted. The -# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range -# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 -# symbols. At the end of a run doxygen will report the cache usage and suggest -# the optimal cache size from a speed point of view. -# Minimum value: 0, maximum value: 9, default value: 0. - -LOOKUP_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in -# documentation are documented, even if no documentation was available. Private -# class members and static file members will be hidden unless the -# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. -# Note: This will also disable the warnings about undocumented members that are -# normally produced when WARNINGS is set to YES. -# The default value is: NO. - -EXTRACT_ALL = NO - -# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will -# be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIVATE = NO - -# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual -# methods of a class will be included in the documentation. -# The default value is: NO. - -# EXTRACT_PRIV_VIRTUAL = NO - -# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal -# scope will be included in the documentation. -# The default value is: NO. - -EXTRACT_PACKAGE = NO - -# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be -# included in the documentation. -# The default value is: NO. - -EXTRACT_STATIC = NO - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO, -# only classes defined in header files are included. Does not have any effect -# for Java sources. -# The default value is: YES. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. If set to YES, local methods, -# which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO, only methods in the interface are -# included. -# The default value is: NO. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base name of -# the file that contains the anonymous namespace. By default anonymous namespace -# are hidden. -# The default value is: NO. - -EXTRACT_ANON_NSPACES = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all -# undocumented members inside documented classes or files. If set to NO these -# members will be included in the various overviews, but no documentation -# section is generated. This option has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. If set -# to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# (class|struct|union) declarations. If set to NO, these declarations will be -# included in the documentation. -# The default value is: NO. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO, these -# blocks will be appended to the function's detailed documentation block. -# The default value is: NO. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation that is typed after a -# \internal command is included. If the tag is set to NO then the documentation -# will be excluded. Set it to YES to include the internal documentation. -# The default value is: NO. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file -# names in lower-case letters. If set to YES, upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# (including Cygwin) ands Mac users are advised to set this option to NO. -# The default value is: system dependent. - -CASE_SENSE_NAMES = NO - -# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES, the -# scope will be hidden. -# The default value is: NO. - -HIDE_SCOPE_NAMES = NO - -# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will -# append additional text to a page's title, such as Class Reference. If set to -# YES the compound reference will be hidden. -# The default value is: NO. - -HIDE_COMPOUND_REFERENCE= NO - -# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of -# the files that are included by a file in the documentation of that file. -# The default value is: YES. - -SHOW_INCLUDE_FILES = YES - -# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each -# grouped member an include statement to the documentation, telling the reader -# which file to include in order to use the member. -# The default value is: NO. - -SHOW_GROUPED_MEMB_INC = NO - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include -# files with double quotes in the documentation rather than with sharp brackets. -# The default value is: NO. - -FORCE_LOCAL_INCLUDES = NO - -# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the -# documentation for inline members. -# The default value is: YES. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the -# (detailed) documentation of file and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. -# The default value is: YES. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief -# descriptions of file, namespace and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. Note that -# this will also influence the order of the classes in the class list. -# The default value is: NO. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the -# (brief and detailed) documentation of class members so that constructors and -# destructors are listed first. If set to NO the constructors will appear in the -# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. -# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief -# member documentation. -# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting -# detailed member documentation. -# The default value is: NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy -# of group names into alphabetical order. If set to NO the group names will -# appear in their defined order. -# The default value is: NO. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by -# fully-qualified names, including namespaces. If set to NO, the class list will -# be sorted only by class name, not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the alphabetical -# list. -# The default value is: NO. - -SORT_BY_SCOPE_NAME = NO - -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper -# type resolution of all parameters of a function it will reject a match between -# the prototype and the implementation of a member function even if there is -# only one candidate or it is obvious which candidate to choose by doing a -# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still -# accept a match between prototype and implementation in such cases. -# The default value is: NO. - -STRICT_PROTO_MATCHING = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo -# list. This list is created by putting \todo commands in the documentation. -# The default value is: YES. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test -# list. This list is created by putting \test commands in the documentation. -# The default value is: YES. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug -# list. This list is created by putting \bug commands in the documentation. -# The default value is: YES. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) -# the deprecated list. This list is created by putting \deprecated commands in -# the documentation. -# The default value is: YES. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional documentation -# sections, marked by \if ... \endif and \cond -# ... \endcond blocks. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the -# initial value of a variable or macro / define can have for it to appear in the -# documentation. If the initializer consists of more lines than specified here -# it will be hidden. Use a value of 0 to hide initializers completely. The -# appearance of the value of individual variables and macros / defines can be -# controlled using \showinitializer or \hideinitializer command in the -# documentation regardless of this setting. -# Minimum value: 0, maximum value: 10000, default value: 30. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES, the -# list will mention the files that were used to generate the documentation. -# The default value is: YES. - -SHOW_USED_FILES = YES - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This -# will remove the Files entry from the Quick Index and from the Folder Tree View -# (if specified). -# The default value is: YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces -# page. This will remove the Namespaces entry from the Quick Index and from the -# Folder Tree View (if specified). -# The default value is: YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the -# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided -# by doxygen. Whatever the program writes to standard output is used as the file -# version. For an example see the documentation. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. To create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. You can -# optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. -# -# Note that if you run doxygen from a directory containing a file called -# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE -# tag is left empty. - -LAYOUT_FILE = - -# The CITE_BIB_FILES tag can be used to specify one or more bib files containing -# the reference definitions. This must be a list of .bib files. The .bib -# extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. -# For LaTeX the style of the bibliography can be controlled using -# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the -# search path. See also \cite for info how to create references. - -CITE_BIB_FILES = - -#--------------------------------------------------------------------------- -# Configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated to -# standard output by doxygen. If QUIET is set to YES this implies that the -# messages are off. -# The default value is: NO. - -QUIET = YES - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES -# this implies that the warnings are on. -# -# Tip: Turn warnings on while writing the documentation. -# The default value is: YES. - -WARNINGS = YES - -# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate -# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: YES. - -WARN_IF_UNDOCUMENTED = NO - -# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. -# The default value is: YES. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that -# are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. -# The default value is: NO. - -WARN_NO_PARAMDOC = NO - -# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. -# The default value is: NO. - -WARN_AS_ERROR = NO - -# The WARN_FORMAT tag determines the format of the warning messages that doxygen -# can produce. The string should contain the $file, $line, and $text tags, which -# will be replaced by the file and line number from which the warning originated -# and the warning text. Optionally the format may contain $version, which will -# be replaced by the version of the file (if it could be obtained via -# FILE_VERSION_FILTER) -# The default value is: $file:$line: $text. - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning and error -# messages should be written. If left blank the output is written to standard -# error (stderr). - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# Configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag is used to specify the files and/or directories that contain -# documented source files. You may enter file names like myfile.cpp or -# directories like /usr/src/myproject. Separate the files or directories with -# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING -# Note: If this tag is empty the current directory is searched. - -INPUT = include - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses -# libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: https://www.gnu.org/software/libiconv/) for the list of -# possible encodings. -# The default value is: UTF-8. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# read by doxygen. -# -# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, -# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, -# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf, *.qsf and *.ice. - -FILE_PATTERNS = *.c \ - *.cc \ - *.cxx \ - *.cpp \ - *.c++ \ - *.java \ - *.ii \ - *.ixx \ - *.ipp \ - *.i++ \ - *.inl \ - *.idl \ - *.ddl \ - *.odl \ - *.h \ - *.hh \ - *.hxx \ - *.hpp \ - *.h++ \ - *.cs \ - *.d \ - *.php \ - *.php4 \ - *.php5 \ - *.phtml \ - *.inc \ - *.m \ - *.markdown \ - *.md \ - *.mm \ - *.dox \ - *.py \ - *.pyw \ - *.f90 \ - *.f95 \ - *.f03 \ - *.f08 \ - *.f \ - *.for \ - *.tcl \ - *.vhd \ - *.vhdl \ - *.ucf \ - *.qsf \ - *.ice - -# The RECURSIVE tag can be used to specify whether or not subdirectories should -# be searched for input files as well. -# The default value is: NO. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should be -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -# -# Note that relative paths are relative to the directory from which doxygen is -# run. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or -# directories that are symbolic links (a Unix file system feature) are excluded -# from the input. -# The default value is: NO. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories for example use the pattern */test/* - -EXCLUDE_PATTERNS = - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories use the pattern */test/* - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or directories -# that contain example code fragments that are included (see the \include -# command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank all -# files are included. - -EXAMPLE_PATTERNS = * - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude commands -# irrespective of the value of the RECURSIVE tag. -# The default value is: NO. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or directories -# that contain images that are to be included in the documentation (see the -# \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command: -# -# -# -# where is the value of the INPUT_FILTER tag, and is the -# name of an input file. Doxygen will then use the output that the filter -# program writes to standard output. If FILTER_PATTERNS is specified, this tag -# will be ignored. -# -# Note that the filter must not add or remove lines; it is applied before the -# code is scanned, but not when the output code is generated. If lines are added -# or removed, the anchors will not be placed correctly. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: pattern=filter -# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how -# filters are used. If the FILTER_PATTERNS tag is empty or if none of the -# patterns match the file name, INPUT_FILTER is applied. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will also be used to filter the input files that are used for -# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). -# The default value is: NO. - -FILTER_SOURCE_FILES = NO - -# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file -# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and -# it is also possible to disable source filtering for a specific pattern using -# *.ext= (so without naming a filter). -# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. - -FILTER_SOURCE_PATTERNS = - -# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that -# is part of the input, its contents will be placed on the main page -# (index.html). This can be useful if you have a project on for instance GitHub -# and want to reuse the introduction page also for the doxygen output. - -USE_MDFILE_AS_MAINPAGE = - -#--------------------------------------------------------------------------- -# Configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will be -# generated. Documented entities will be cross-referenced with these sources. -# -# Note: To get rid of all source code in the generated output, make sure that -# also VERBATIM_HEADERS is set to NO. -# The default value is: NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body of functions, -# classes and enums directly into the documentation. -# The default value is: NO. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any -# special comment blocks from generated source code fragments. Normal C, C++ and -# Fortran comments will always remain visible. -# The default value is: YES. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# entity all documented functions referencing it will be listed. -# The default value is: NO. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES then for each documented function -# all documented entities called/used by that function will be listed. -# The default value is: NO. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES then the hyperlinks from functions in REFERENCES_RELATION and -# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will -# link to the documentation. -# The default value is: YES. - -REFERENCES_LINK_SOURCE = YES - -# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the -# source code will show a tooltip with additional information such as prototype, -# brief description and links to the definition and documentation. Since this -# will make the HTML file larger and loading of large files a bit slower, you -# can opt to disable this feature. -# The default value is: YES. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -SOURCE_TOOLTIPS = YES - -# If the USE_HTAGS tag is set to YES then the references to source code will -# point to the HTML generated by the htags(1) tool instead of doxygen built-in -# source browser. The htags tool is part of GNU's global source tagging system -# (see https://www.gnu.org/software/global/global.html). You will need version -# 4.8.6 or higher. -# -# To use it do the following: -# - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file -# - Make sure the INPUT points to the root of the source tree -# - Run doxygen as normal -# -# Doxygen will invoke htags (and that will in turn invoke gtags), so these -# tools must be available from the command line (i.e. in the search path). -# -# The result: instead of the source browser generated by doxygen, the links to -# source code will now point to the output of htags. -# The default value is: NO. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a -# verbatim copy of the header file for each class for which an include is -# specified. Set to NO to disable this. -# See also: Section \class. -# The default value is: YES. - -VERBATIM_HEADERS = YES - -# If the CLANG_ASSISTED_PARSING tag is set to YES then doxygen will use the -# clang parser (see: http://clang.llvm.org/) for more accurate parsing at the -# cost of reduced performance. This can be particularly helpful with template -# rich C++ code for which doxygen's built-in parser lacks the necessary type -# information. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse_libclang=ON option for CMake. -# The default value is: NO. - -CLANG_ASSISTED_PARSING = NO - -# If clang assisted parsing is enabled you can provide the compiler with command -# line options that you would normally use when invoking the compiler. Note that -# the include paths will already be set by doxygen for the files and directories -# specified with INPUT and INCLUDE_PATH. -# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. - -CLANG_OPTIONS = - -# If clang assisted parsing is enabled you can provide the clang parser with the -# path to the compilation database (see: -# http://clang.llvm.org/docs/HowToSetupToolingForLLVM.html) used when the files -# were built. This is equivalent to specifying the "-p" option to a clang tool, -# such as clang-check. These options will then be passed to the parser. -# Note: The availability of this option depends on whether or not doxygen was -# generated with the -Duse_libclang=ON option for CMake. - -# CLANG_DATABASE_PATH = - -#--------------------------------------------------------------------------- -# Configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all -# compounds will be generated. Enable this if the project contains a lot of -# classes, structs, unions or interfaces. -# The default value is: YES. - -ALPHABETICAL_INDEX = YES - -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output -# The default value is: YES. - -GENERATE_HTML = NO - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each -# generated HTML page (for example: .htm, .php, .asp). -# The default value is: .html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a user-defined HTML header file for -# each generated HTML page. If the tag is left blank doxygen will generate a -# standard header. -# -# To get valid HTML the header file that includes any scripts and style sheets -# that doxygen needs, which is dependent on the configuration options used (e.g. -# the setting GENERATE_TREEVIEW). It is highly recommended to start with a -# default header using -# doxygen -w html new_header.html new_footer.html new_stylesheet.css -# YourConfigFile -# and then modify the file new_header.html. See also section "Doxygen usage" -# for information on how to generate the default header that doxygen normally -# uses. -# Note: The header is subject to change so you typically have to regenerate the -# default header when upgrading to a newer version of doxygen. For a description -# of the possible markers and block names see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each -# generated HTML page. If the tag is left blank doxygen will generate a standard -# footer. See HTML_HEADER for more information on how to generate a default -# footer and what special commands can be used inside the footer. See also -# section "Doxygen usage" for information on how to generate the default footer -# that doxygen normally uses. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style -# sheet that is used by each HTML page. It can be used to fine-tune the look of -# the HTML output. If left blank doxygen will generate a default style sheet. -# See also section "Doxygen usage" for information on how to generate the style -# sheet that doxygen normally uses. -# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as -# it is more robust and this tag (HTML_STYLESHEET) will in the future become -# obsolete. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_STYLESHEET = - -# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined -# cascading style sheets that are included after the standard style sheets -# created by doxygen. Using this option one can overrule certain style aspects. -# This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefore more robust against future updates. -# Doxygen will copy the style sheet files to the output directory. -# Note: The order of the extra style sheet files is of importance (e.g. the last -# style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_STYLESHEET = - -# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the HTML output directory. Note -# that these files will be copied to the base HTML output directory. Use the -# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these -# files. In the HTML_STYLESHEET file, use the file name only. Also note that the -# files will be copied as-is; there are no commands or markers available. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_FILES = - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see -# https://en.wikipedia.org/wiki/Hue for more information. For instance the value -# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 -# purple, and 360 is red again. -# Minimum value: 0, maximum value: 359, default value: 220. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_HUE = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A -# value of 255 will produce the most vivid colors. -# Minimum value: 0, maximum value: 255, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_SAT = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the -# luminance component of the colors in the HTML output. Values below 100 -# gradually make the output lighter, whereas values above 100 make the output -# darker. The value divided by 100 is the actual gamma applied, so 80 represents -# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not -# change the gamma. -# Minimum value: 40, maximum value: 240, default value: 80. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_GAMMA = 80 - -# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML -# page will contain the date and time when the page was generated. Setting this -# to YES can help to show when doxygen was last run and thus if the -# documentation is up to date. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_TIMESTAMP = NO - -# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML -# documentation will contain a main index with vertical navigation menus that -# are dynamically created via Javascript. If disabled, the navigation index will -# consists of multiple levels of tabs that are statically embedded in every HTML -# page. Disable this option to support browsers that do not have Javascript, -# like the Qt help browser. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -# HTML_DYNAMIC_MENUS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_SECTIONS = NO - -# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries -# shown in the various tree structured indices initially; the user can expand -# and collapse entries dynamically later on. Doxygen will expand the tree to -# such a level that at most the specified number of entries are visible (unless -# a fully collapsed tree already exceeds this amount). So setting the number of -# entries 1 will produce a full collapsed tree by default. 0 is a special value -# representing an infinite number of entries and will result in a full expanded -# tree by default. -# Minimum value: 0, maximum value: 9999, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_INDEX_NUM_ENTRIES = 100 - -# If the GENERATE_DOCSET tag is set to YES, additional index files will be -# generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: https://developer.apple.com/xcode/), introduced with OSX -# 10.5 (Leopard). To create a documentation set, doxygen will generate a -# Makefile in the HTML output directory. Running make will produce the docset in -# that directory and running make install will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy -# genXcode/_index.html for more information. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_DOCSET = NO - -# This tag determines the name of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# The default value is: Doxygen generated docs. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# This tag specifies a string that should uniquely identify the documentation -# set bundle. This should be a reverse domain-name style string, e.g. -# com.mycompany.MyDocSet. Doxygen will append .docset to the name. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. -# The default value is: org.doxygen.Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_ID = org.doxygen.Publisher - -# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. -# The default value is: Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_NAME = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three -# additional HTML index files: index.hhp, index.hhc, and index.hhk. The -# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: https://www.microsoft.com/en-us/download/details.aspx?id=21138) on -# Windows. -# -# The HTML Help Workshop contains a compiler that can convert all HTML output -# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML -# files are now used as the Windows 98 help format, and will replace the old -# Windows help format (.hlp) on all Windows platforms in the future. Compressed -# HTML files also contain an index, a table of contents, and you can search for -# words in the documentation. The HTML workshop also contains a viewer for -# compressed HTML files. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_HTMLHELP = NO - -# The CHM_FILE tag can be used to specify the file name of the resulting .chm -# file. You can add a path in front of the file if the result should not be -# written to the html output directory. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_FILE = - -# The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler (hhc.exe). If non-empty, -# doxygen will try to run the HTML help compiler on the generated index.hhp. -# The file has to be specified with full path. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -HHC_LOCATION = - -# The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the master .chm file (NO). -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -GENERATE_CHI = NO - -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) -# and project file content. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_INDEX_ENCODING = - -# The BINARY_TOC flag controls whether a binary table of contents is generated -# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it -# enables the Previous and Next buttons. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members to -# the table of contents of the HTML help documentation and to the tree view. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that -# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help -# (.qch) of the generated HTML documentation. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify -# the file name of the resulting .qch file. The path specified is relative to -# the HTML output folder. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help -# Project output. For more information please see Qt Help Project / Namespace -# (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_NAMESPACE = org.doxygen.Project - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt -# Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual- -# folders). -# The default value is: doc. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_VIRTUAL_FOLDER = doc - -# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom -# filter to add. For more information please see Qt Help Project / Custom -# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom- -# filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's filter section matches. Qt Help Project / Filter Attributes (see: -# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_SECT_FILTER_ATTRS = - -# The QHG_LOCATION tag can be used to specify the location of Qt's -# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the -# generated .qhp file. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHG_LOCATION = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be -# generated, together with the HTML files, they form an Eclipse help plugin. To -# install this plugin and make it available under the help contents menu in -# Eclipse, the contents of the directory containing the HTML and XML files needs -# to be copied into the plugins directory of eclipse. The name of the directory -# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. -# After copying Eclipse needs to be restarted before the help appears. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_ECLIPSEHELP = NO - -# A unique identifier for the Eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have this -# name. Each documentation set should have its own identifier. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. - -ECLIPSE_DOC_ID = org.doxygen.Project - -# If you want full control over the layout of the generated HTML pages it might -# be necessary to disable the index and replace it with your own. The -# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top -# of each HTML page. A value of NO enables the index and the value YES disables -# it. Since the tabs in the index contain the same information as the navigation -# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -DISABLE_INDEX = NO - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. If the tag -# value is set to YES, a side panel will be generated containing a tree-like -# index structure (just like the one that is generated for HTML Help). For this -# to work a browser that supports JavaScript, DHTML, CSS and frames is required -# (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_TREEVIEW = NO - -# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that -# doxygen will group on one line in the generated HTML documentation. -# -# Note that a value of 0 will completely suppress the enum values from appearing -# in the overview section. -# Minimum value: 0, maximum value: 20, default value: 4. -# This tag requires that the tag GENERATE_HTML is set to YES. - -ENUM_VALUES_PER_LINE = 4 - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used -# to set the initial width (in pixels) of the frame in which the tree is shown. -# Minimum value: 0, maximum value: 1500, default value: 250. -# This tag requires that the tag GENERATE_HTML is set to YES. - -TREEVIEW_WIDTH = 250 - -# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to -# external symbols imported via tag files in a separate window. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -EXT_LINKS_IN_WINDOW = NO - -# Use this tag to change the font size of LaTeX formulas included as images in -# the HTML documentation. When you change the font size after a successful -# doxygen run you need to manually remove any form_*.png images from the HTML -# output directory to force them to be regenerated. -# Minimum value: 8, maximum value: 50, default value: 10. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_FONTSIZE = 10 - -# Use the FORMULA_TRANSPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -# FORMULA_TRANSPARENT = YES - -# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# https://www.mathjax.org) which uses client side Javascript for the rendering -# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX -# installed or if you want to formulas look prettier in the HTML output. When -# enabled you may also need to install MathJax separately and configure the path -# to it using the MATHJAX_RELPATH option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -USE_MATHJAX = NO - -# When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/latest/output.html) for more details. -# Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. -# The default value is: HTML-CSS. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_FORMAT = HTML-CSS - -# When MathJax is enabled you need to specify the location relative to the HTML -# output directory using the MATHJAX_RELPATH option. The destination directory -# should contain the MathJax.js script. For instance, if the mathjax directory -# is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax -# Content Delivery Network so you can quickly see the result without installing -# MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/ - -# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax -# extension names that should be enabled during MathJax rendering. For example -# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_EXTENSIONS = - -# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces -# of code that will be used on startup of the MathJax code. See the MathJax site -# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an -# example see the documentation. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_CODEFILE = - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box for -# the HTML output. The underlying search engine uses javascript and DHTML and -# should work on any modern browser. Note that when using HTML help -# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) -# there is already a search function so this one should typically be disabled. -# For large projects the javascript based search engine can be slow, then -# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to -# search using the keyboard; to jump to the search box use + S -# (what the is depends on the OS and browser, but it is typically -# , /