From 87f3394404ff9f9ec92c906cd4c39b5562aea42e Mon Sep 17 00:00:00 2001 From: Kevin Chen Date: Thu, 18 Aug 2022 16:36:47 -0700 Subject: [PATCH] TensorRT 8.4.3.1 updates Signed-off-by: Kevin Chen --- README.md | 10 +++++----- docker/centos-7.Dockerfile | 2 +- docker/ubuntu-18.04.Dockerfile | 2 +- docker/ubuntu-20.04-aarch64.Dockerfile | 2 +- docker/ubuntu-20.04.Dockerfile | 2 +- docker/ubuntu-cross-aarch64.Dockerfile | 2 +- include/NvInferVersion.h | 6 +++--- samples/common/sampleInference.cpp | 22 ++++++++++++++++------ 8 files changed, 29 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index d9aeebb5..a662a59a 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Need enterprise support? NVIDIA global support is available for TensorRT with th To build the TensorRT-OSS components, you will first need the following software packages. **TensorRT GA build** -* [TensorRT](https://developer.nvidia.com/nvidia-tensorrt-download) v8.4.2.4 +* [TensorRT](https://developer.nvidia.com/nvidia-tensorrt-download) v8.4.3.1 **System Packages** * [CUDA](https://developer.nvidia.com/cuda-toolkit) @@ -71,16 +71,16 @@ To build the TensorRT-OSS components, you will first need the following software ```bash cd ~/Downloads - tar -xvzf TensorRT-8.4.2.4.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz - export TRT_LIBPATH=`pwd`/TensorRT-8.4.2.4 + tar -xvzf TensorRT-8.4.3.1.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz + export TRT_LIBPATH=`pwd`/TensorRT-8.4.3.1 ``` **Example: Windows on x86-64 with cuda-11.4** ```powershell cd ~\Downloads - Expand-Archive .\TensorRT-8.4.2.4.Windows10.x86_64.cuda-11.6.cudnn8.4.zip - $Env:TRT_LIBPATH = '$(Get-Location)\TensorRT-8.4.2.4' + Expand-Archive .\TensorRT-8.4.3.1.Windows10.x86_64.cuda-11.6.cudnn8.4.zip + $Env:TRT_LIBPATH = '$(Get-Location)\TensorRT-8.4.3.1' $Env:PATH += 'C:\Program Files (x86)\Microsoft Visual Studio\2017\Professional\MSBuild\15.0\Bin\' ``` diff --git a/docker/centos-7.Dockerfile b/docker/centos-7.Dockerfile index 4de1d4e5..3e6d3d66 100644 --- a/docker/centos-7.Dockerfile +++ b/docker/centos-7.Dockerfile @@ -21,7 +21,7 @@ ARG OS_VERSION=7 FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-centos${OS_VERSION} LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 8.4.2.4 +ENV TRT_VERSION 8.4.3.1 SHELL ["/bin/bash", "-c"] # Setup user account diff --git a/docker/ubuntu-18.04.Dockerfile b/docker/ubuntu-18.04.Dockerfile index fcc6c3cd..7a2c6ad3 100644 --- a/docker/ubuntu-18.04.Dockerfile +++ b/docker/ubuntu-18.04.Dockerfile @@ -21,7 +21,7 @@ ARG OS_VERSION=18.04 FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu${OS_VERSION} LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 8.4.2.4 +ENV TRT_VERSION 8.4.3.1 SHELL ["/bin/bash", "-c"] # Setup user account diff --git a/docker/ubuntu-20.04-aarch64.Dockerfile b/docker/ubuntu-20.04-aarch64.Dockerfile index 4aa6e0c8..e301ded8 100644 --- a/docker/ubuntu-20.04-aarch64.Dockerfile +++ b/docker/ubuntu-20.04-aarch64.Dockerfile @@ -18,7 +18,7 @@ # Multi-arch container support available in non-cudnn containers. FROM nvidia/cuda:11.4.2-devel-ubuntu20.04 -ENV TRT_VERSION 8.4.2.4 +ENV TRT_VERSION 8.4.3.1 SHELL ["/bin/bash", "-c"] # Setup user account diff --git a/docker/ubuntu-20.04.Dockerfile b/docker/ubuntu-20.04.Dockerfile index 8bc2fff4..05a75abf 100644 --- a/docker/ubuntu-20.04.Dockerfile +++ b/docker/ubuntu-20.04.Dockerfile @@ -21,7 +21,7 @@ ARG OS_VERSION=20.04 FROM nvidia/cuda:${CUDA_VERSION}-cudnn8-devel-ubuntu${OS_VERSION} LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 8.4.2.4 +ENV TRT_VERSION 8.4.3.1 SHELL ["/bin/bash", "-c"] # Setup user account diff --git a/docker/ubuntu-cross-aarch64.Dockerfile b/docker/ubuntu-cross-aarch64.Dockerfile index 7a7bceb6..913f3cec 100644 --- a/docker/ubuntu-cross-aarch64.Dockerfile +++ b/docker/ubuntu-cross-aarch64.Dockerfile @@ -21,7 +21,7 @@ ARG OS_VERSION=20.04 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION} LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 8.4.2.4 +ENV TRT_VERSION 8.4.3.1 ENV DEBIAN_FRONTEND=noninteractive ARG uid=1000 diff --git a/include/NvInferVersion.h b/include/NvInferVersion.h index d6026af4..35f1f8f9 100644 --- a/include/NvInferVersion.h +++ b/include/NvInferVersion.h @@ -21,8 +21,8 @@ #define NV_TENSORRT_MAJOR 8 //!< TensorRT major version. #define NV_TENSORRT_MINOR 4 //!< TensorRT minor version. -#define NV_TENSORRT_PATCH 2 //!< TensorRT patch version. -#define NV_TENSORRT_BUILD 4 //!< TensorRT build number. +#define NV_TENSORRT_PATCH 3 //!< TensorRT patch version. +#define NV_TENSORRT_BUILD 1 //!< TensorRT build number. #define NV_TENSORRT_LWS_MAJOR 0 //!< TensorRT LWS major version. #define NV_TENSORRT_LWS_MINOR 0 //!< TensorRT LWS minor version. @@ -30,6 +30,6 @@ #define NV_TENSORRT_SONAME_MAJOR 8 //!< Shared object library major version number. #define NV_TENSORRT_SONAME_MINOR 4 //!< Shared object library minor version number. -#define NV_TENSORRT_SONAME_PATCH 2 //!< Shared object library patch version number. +#define NV_TENSORRT_SONAME_PATCH 3 //!< Shared object library patch version number. #endif // NV_INFER_VERSION_H diff --git a/samples/common/sampleInference.cpp b/samples/common/sampleInference.cpp index b4425acb..a14c70d8 100644 --- a/samples/common/sampleInference.cpp +++ b/samples/common/sampleInference.cpp @@ -579,7 +579,7 @@ class Iteration if (!skipTransfers) { record(EventType::kINPUT_S, StreamType::kINPUT); - mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); + setInputData(false); record(EventType::kINPUT_E, StreamType::kINPUT); wait(EventType::kINPUT_E, StreamType::kCOMPUTE); // Wait for input DMA before compute } @@ -597,7 +597,7 @@ class Iteration { wait(EventType::kCOMPUTE_E, StreamType::kOUTPUT); // Wait for compute before output DMA record(EventType::kOUTPUT_S, StreamType::kOUTPUT); - mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); + fetchOutputData(false); record(EventType::kOUTPUT_E, StreamType::kOUTPUT); } @@ -641,14 +641,24 @@ class Iteration getStream(StreamType::kINPUT).wait(gpuStart); } - void setInputData() + void setInputData(bool sync) { mBindings.transferInputToDevice(getStream(StreamType::kINPUT)); + // additional sync to avoid overlapping with inference execution. + if (sync) + { + getStream(StreamType::kINPUT).synchronize(); + } } - void fetchOutputData() + void fetchOutputData(bool sync) { mBindings.transferOutputToHost(getStream(StreamType::kOUTPUT)); + // additional sync to avoid overlapping with inference execution. + if (sync) + { + getStream(StreamType::kOUTPUT).synchronize(); + } } private: @@ -841,7 +851,7 @@ void inferenceExecution(InferenceOptions const& inference, InferenceEnvironment& streamId, inference, *iEnv.template getContext(streamId), *iEnv.bindings[streamId]); if (inference.skipTransfers) { - iteration->setInputData(); + iteration->setInputData(true); } iStreams.emplace_back(iteration); } @@ -862,7 +872,7 @@ void inferenceExecution(InferenceOptions const& inference, InferenceEnvironment& { for (auto& s : iStreams) { - s->fetchOutputData(); + s->fetchOutputData(true); } }