diff --git a/.ci/sonar-project.properties b/.ci/sonar-project.properties index d2ea04418..19d3c886c 100644 --- a/.ci/sonar-project.properties +++ b/.ci/sonar-project.properties @@ -18,7 +18,7 @@ sonar.lang.patterns.cpp=**/*.cpp,**/*.cc,**/*.cxx,**/*.c++,**/*.h,**/*.hpp,**/*. sonar.working.directory=build/sonar-workdir sonar.cfamily.build-wrapper-output=build/bw-output -# it expects cppunit xml format. googletest format is uncompatible. +# it expects cppunit xml format. googletest format is incompatible. # sonar.cfamily.cppunit.reportsPath=build/unittest-reports sonar.cfamily.gcov.reportsPath=build diff --git a/.clang-tidy b/.clang-tidy index 9682bbabf..ce40e8080 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -22,7 +22,7 @@ CheckOptions: - key: readability-function-size.ParameterThreshold value: '6' - key: readability-function-size.NestingThreshold - value: '6' + value: '7' - key: readability-function-size.VariableThreshold value: '30' - key: readability-simplify-boolean-expr.ChainedConditionalAssignment @@ -48,5 +48,5 @@ CheckOptions: - key: modernize-use-default-member-init.UseAssignment value: 1 - key: cppcoreguidelines-macro-usage.AllowedRegexp - value: 'DEBUG|_GLIBCXX_SANITIZE_VECTOR|RAWSPEED_SOURCE_DIR|STR|XSTR|BUFFER_PADDING|BSWAP16|BSWAP32|BSWAP64|ThrowExceptionHelper|ThrowIOE|ThrowRSE|ThrowCME|ThrowRDE|ThrowRPE|ThrowTPE|ThrowFIE|ThrowCPE|ThrowFPE|DECODER|fuji_quant_gradient|JPEG_MEMSRC|RLVTABLE|PRECISION_MIN|PRECISION_MAX|MARKER_BAND_END|SQR|RS_CAMERAS_XML_PATH|FULLDECODE|IMPL|IMPL0|IMPL1|PUMP|DECODE|PARSER|GEN_E|GEN_PFS|GEN_PSS|BENCHMARK_CAPTURE_NAME|OMPFIRSTPRIVATECLAUSE|OMPSHAREDCLAUSE' + value: 'DEBUG|_GLIBCXX_SANITIZE_VECTOR|RAWSPEED_SOURCE_DIR|STR|XSTR|BSWAP16|BSWAP32|BSWAP64|ThrowExceptionHelper|ThrowIOE|ThrowRSE|ThrowCME|ThrowRDE|ThrowRPE|ThrowTPE|ThrowFIE|ThrowCPE|ThrowFPE|DECODER|fuji_quant_gradient|JPEG_MEMSRC|RLVTABLE|PRECISION_MIN|PRECISION_MAX|MARKER_BAND_END|SQR|RS_CAMERAS_XML_PATH|FULLDECODE|IMPL|IMPL0|IMPL1|PUMP|DECODE|PARSER|GEN_E|GEN_PFS|GEN_PSS|BENCHMARK_CAPTURE_NAME|OMPFIRSTPRIVATECLAUSE|OMPSHAREDCLAUSE|RAWSPEED_UNLIKELY_FUNCTION|RAWSPEED_NOINLINE' ... diff --git a/.travis.yml b/.travis.yml index f327193f2..15dbfa4b5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,7 +27,14 @@ install: echo "oracle-java9-installer hold" | sudo dpkg --set-selections; travis_retry sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y; travis_retry sudo apt-get update -q; - travis_retry sudo apt-get install -y -q -f --fix-missing clang-3.5 clang-3.9 cmake g++-5 git libiomp-dev libjpeg-dev libpugixml-dev libxml2-utils make ninja-build zlib1g-dev; + travis_retry sudo apt-get install -y -q -f --fix-missing clang-3.9 cmake g++-5 git libiomp-dev libjpeg-dev libpugixml-dev libxml2-utils make ninja-build zlib1g-dev; + fi; + - if [[ "$TRAVIS_OS_NAME" == "linux" && "$EXTRA" == "NODOCKER" && "$TRAVIS_DIST" == "xenial" ]]; then + echo "oracle-java8-installer hold" | sudo dpkg --set-selections; + echo "oracle-java9-installer hold" | sudo dpkg --set-selections; + travis_retry sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y; + travis_retry sudo apt-get update -q; + travis_retry sudo apt-get install -y -q -f --fix-missing clang-3.5; fi; - if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then travis_retry brew update > /dev/null && brew tap Homebrew/bundle && cd .ci && brew bundle --verbose; @@ -62,26 +69,26 @@ jobs: include: - stage: test os: linux - dist: trusty + dist: bionic sudo: required services: - docker env: CC=gcc-9 CXX=g++-9 - stage: test os: linux - dist: trusty + dist: bionic sudo: required services: - docker env: CC=gcc-9 CXX=g++-9 ECO="-DWITH_OPENMP=OFF" - os: linux - dist: trusty + dist: bionic sudo: required services: - docker env: CC=clang-9 CXX=clang++-9 - os: linux - dist: trusty + dist: bionic sudo: required services: - docker @@ -95,40 +102,40 @@ jobs: #- os: osx # env: CC=cc CXX=c++ FLAVOR=Coverage - os: linux - dist: trusty + dist: bionic sudo: required services: - docker # Don't forget to ensure that FindLLVMClangTidy.cmake is also bumped env: CC=clang-9 CXX=clang++-9 TARGET=STATICANALYSIS ECO="-DUSE_CLANG_TIDY=ON" - os: linux - dist: trusty + dist: bionic sudo: required services: - docker env: CC=clang-9 CXX=clang++-9 ECO="-DWITH_OPENMP=OFF" - os: linux - dist: trusty + dist: bionic sudo: required env: CC=gcc-5 CXX=g++-5 EXTRA=NODOCKER - os: linux - dist: trusty + dist: xenial sudo: required - env: CC=clang-3.5 CXX=clang++-3.5 EXTRA=NODOCKER ECO="-DWITH_OPENMP=OFF" + env: CC=clang-3.5 CXX=clang++-3.5 EXTRA=NODOCKER ECO="-DWITH_OPENMP=OFF -DWITH_ZLIB=OFF" - os: linux - dist: trusty + dist: bionic sudo: required env: CC=clang-3.9 CXX=clang++-3.9 EXTRA=NODOCKER #- stage: deploy # os: linux # sudo: required - # dist: xenial + # dist: bionic # edge: true # services: # - docker - stage: deploy os: linux - dist: trusty + dist: bionic sudo: required services: - docker diff --git a/README.rst b/README.rst index c15f58100..302ac72f3 100644 --- a/README.rst +++ b/README.rst @@ -17,6 +17,7 @@ rawspeed |travis-ci| |appveyor-ci| OBS_ |codecov| |oss-fuzz| ================================================================================ RawSpeed Developer Information ================================================================================ + What is RawSpeed? -------------------------------------------------------------------------------- diff --git a/bench/librawspeed/bench/Common.h b/bench/librawspeed/bench/Common.h index 7b4f99282..e4819b89b 100644 --- a/bench/librawspeed/bench/Common.h +++ b/bench/librawspeed/bench/Common.h @@ -18,6 +18,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#pragma once + #include "common/Point.h" // for iPoint2D #include // for size_t diff --git a/bench/librawspeed/io/BitStreamBenchmark.cpp b/bench/librawspeed/io/BitStreamBenchmark.cpp index 4919cd5b5..03f5e6dd6 100644 --- a/bench/librawspeed/io/BitStreamBenchmark.cpp +++ b/bench/librawspeed/io/BitStreamBenchmark.cpp @@ -61,7 +61,8 @@ static inline void BM_BitStream(benchmark::State& state, Endianness endianness, assert((Step == 1) || rawspeed::isAligned(Step, 2)); assert((fillSize == 1) || rawspeed::isAligned(fillSize, 2)); - const rawspeed::Buffer b(state.range(0)); + auto storage = rawspeed::Buffer::Create(state.range(0)); + const rawspeed::Buffer b(storage.get(), state.range(0)); assert(b.getSize() > 0); assert(b.getSize() == (size_t)state.range(0)); diff --git a/cmake/src-dependencies.cmake b/cmake/src-dependencies.cmake index 8e1f97ee3..52e716cee 100644 --- a/cmake/src-dependencies.cmake +++ b/cmake/src-dependencies.cmake @@ -43,21 +43,11 @@ if(WITH_OPENMP) message(STATUS "Looking for OpenMP - found (system)") endif() - # FIXME: OpenMP::OpenMP_CXX target, and ${OpenMP_CXX_LIBRARIES} were both - # added in cmake-3.9. Until then, this is correct: - if(NOT TARGET OpenMP::OpenMP_CXX) - add_library(OpenMP::OpenMP_CXX INTERFACE IMPORTED) - if(OpenMP_CXX_FLAGS) - set_property(TARGET OpenMP::OpenMP_CXX PROPERTY INTERFACE_COMPILE_OPTIONS ${OpenMP_CXX_FLAGS}) - set_property(TARGET OpenMP::OpenMP_CXX PROPERTY INTERFACE_LINK_LIBRARIES ${OpenMP_CXX_FLAGS}) - # Yes, both of them to the same value. - endif() - endif() - # The wrapper library that *actually* should be linked to. add_library(RawSpeed::OpenMP_CXX INTERFACE IMPORTED) set_property(TARGET RawSpeed::OpenMP_CXX PROPERTY INTERFACE_COMPILE_OPTIONS $) set_property(TARGET RawSpeed::OpenMP_CXX APPEND PROPERTY INTERFACE_COMPILE_OPTIONS ${OPENMP_VERSION_SPECIFIER}) + set_property(TARGET RawSpeed::OpenMP_CXX PROPERTY INTERFACE_INCLUDE_DIRECTORIES $) if(NOT USE_BUNDLED_LLVMOPENMP) set_property(TARGET RawSpeed::OpenMP_CXX PROPERTY INTERFACE_LINK_LIBRARIES $) else() @@ -145,7 +135,7 @@ add_feature_info("XML reading" HAVE_PUGIXML "used for loading of data/cameras.xm unset(HAVE_JPEG) if(WITH_JPEG) message(STATUS "Looking for JPEG") - find_package(JPEG 1.5.0) # libjpeg-turbo + find_package(JPEG) if(NOT JPEG_FOUND) message(SEND_ERROR "Did not find JPEG! Either make it find JPEG, or pass -DWITH_JPEG=OFF to disable JPEG.") else() diff --git a/data/cameras.xml b/data/cameras.xml index bd040d63a..2b8ad46ec 100644 --- a/data/cameras.xml +++ b/data/cameras.xml @@ -4275,6 +4275,17 @@ + + Olympus E-M5 Mark III + + RED + GREEN + GREEN + BLUE + + + + Olympus E-P1 @@ -8371,6 +8382,17 @@ + + Sony DSC-RX100M7 + + RED + GREEN + GREEN + BLUE + + + + Sony DSC-RX1R @@ -8783,6 +8805,17 @@ + + Sony ILCE-6600 + + RED + GREEN + GREEN + BLUE + + + + Sony ILCE-7 @@ -9701,6 +9734,20 @@ + + Fujifilm X-A10 + + RED + GREEN + GREEN + BLUE + + + + + + + Fujifilm XQ1 diff --git a/docs/IntegrationTesting.rst b/docs/IntegrationTesting.rst new file mode 100644 index 000000000..9df5ee639 --- /dev/null +++ b/docs/IntegrationTesting.rst @@ -0,0 +1,163 @@ +.. _integration_testing: + +================================================================================ +Integration Testing +================================================================================ + +.. seealso:: + + :ref:`RSA` + +As a first step, you *need* to acquire the sample archive you will want to use, +see e.g. :ref:`rpu_rsync`. + +Due to the specifics of the the domain, just having the samples you want to use +for integration testing is not sufficient. Given *just* the samples, it is not +possible to verify anything in an automatic manner. + +You can, of course, load the samples into some software that uses the +`RawSpeed `_ library, for example into darktable_, and see that they +decoded into some meaningful image, but that is indirect and tests much more +than just the library. + +.. _rawspeed: https://github.com/darktable-org/rawspeed +.. _darktable: https://github.com/darktable-org/darktable + +So instead, we want to document (record, called `a hash` onwards) how the +samples decode 'currently' (in a trusted, known-good hardware/software/compiler +stack/compilation options etc), store this per-image info, and then just check +against it afterwards (after modifying the library, or anything else really). + +.. _producing_trusted_reference_hashes: + +Producing Trusted reference Hashes +---------------------------------- + +Optionally, it may or may not be a good idea to first manually inspect the +samples (via e.g. darktable_), make a note which are seemingly currently decoded +correctly, and which are not. + +For best results the Trusted Hashes should be produced in most mundane +environment - stable mainstream hardware (little-endian, x86; no overclocking), +stable software stack, and most importantly a trusted compiler. You also +shouldn't use ``-Oomg-optimize -fmoar-performance`` compilation flags for this. + +.. WARNING:: + Trusted baseline hashes are a the very foundation for any further integration + testing. It is always important to have good, stable foundation. It will not + be productive if those hashes are produced incorrectly, be it either because + the hardware is faulty (RAM/disk bit flips), or the library was miscompiled. + +Other than that, generating said hashes is pretty trivial. + +Specifying location of Reference Sample Archive +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In order to make use of build system integration of integration testing, +we must first tell it where the :ref:`sample set` is located, +for example: + +:: + + $ cmake -DRAWSPEED_ENABLE_SAMPLE_BASED_TESTING:BOOL=ON \ + -DRAWSPEED_REFERENCE_SAMPLE_ARCHIVE:PATH="~/raw-camera-samples/raw.pixls.us-unique/" \ + + +.. NOTE:: + + The location of the samples must be writable if you intend to produce hashes. + +Other required CMake flags +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +We also need to build the code that is actually responsible for these +integration tests: + +:: + + $ cmake -DBUILD_TOOLS:BOOL=ON \ + + +Producing hashes +~~~~~~~~~~~~~~~~ + +After that is done, we can finally create the hashes, and for that there is +a ``rstest-create`` build target: + +:: + + $ cmake --build . -- rstest-clean # get rid of any pre-existing hashes, just in case. + [1/1] Running utility command for rstest-clean + $ cmake --build . -- rstest-create + + [0/1] Running utility command for rstest-create + : starting decoding ... + : <> MB / <> ms + + Total decoding time: <>s + + All good, all hashes created! + +And that's it, we've got the hashes! They were placed next to the samples in +the archive, with ``.hash`` suffix appended. Maybe you want to use some kind of +layered file system (overlayfs_ e.g.) to separate those from the actual samples, +up to you. + +.. _overlayfs: https://www.kernel.org/doc/Documentation/filesystems/overlayfs.txt + +Performing Integration Testing +------------------------------ + +.. IMPORTANT:: + + Do ensure that the library is actually re-compiled with the changes you want + to test. To err on the safe side, sometimes it is useful to remove the entire + build directory and make a fresh build! + +After you have performed the changes you wanted to - modified the library, +or changed hardware/software/compiler/compiler flags - and you want to validate +that those changes did not cause any regressions in the sample set, it is time +to actually make use of the Trusted Reference Hashes that we have created +previously. + +For that, there is a ``rstest-test`` build target. +If everything is good you may see: + +:: + + $ cmake --build . -- rstest-test + [0/1] Running utility command for rstest-test + : starting decoding ... + : <> MB / <> ms + Total decoding time: <>s + + All good, no tests failed! + +Or, if there are issues, you may see: + +:: + + $ cmake --build . -- rstest-test + [0/1] Running utility command for rstest-test + : starting decoding ... + : <> MB / <> ms + failed: hash/metadata mismatch + Total decoding time: <>s + + WARNING: the following <> tests have failed: + failed: hash/metadata mismatch + See rstest.log for details. + <...> + ninja: build stopped: subcommand failed. + +Unless the process crashed, it should have created +``.hash.failed``, and outputted the diff_ between +the existing ``.hash`` Trusted Hash and the actual result +``.hash.failed`` into ``rstest.log`` file in root of the +build dir. + +.. _diff: https://manpages.debian.org/unstable/diffutils/diff.1.en.html + +.. seealso:: + + :ref:`lnt` diff --git a/docs/ReferenceSampleArchive.rst b/docs/ReferenceSampleArchive.rst new file mode 100644 index 000000000..cbf471628 --- /dev/null +++ b/docs/ReferenceSampleArchive.rst @@ -0,0 +1,97 @@ +.. _RSA: + +================================================================================ +Reference Sample Archive +================================================================================ + +While there is some test coverage via unit tests, the major bulk of testing +is achieved via integration tests over some sample set. + +.. _sampleset: + +What is considered a sample set +------------------------------- + +Here and onwards, a sample set is just a directory with samples, and two special +files. There should be a ``timestamp.txt`` containing an +`Unix time `_ (presumably, of when the set was last updated). +Most importantly, it **must** also contain ``filelist.sha1`` file in the +top-level directory, which is used as a digest to the contents of said sample +set. Said file **must** be a valid sha1sum_ output, with format: + +:: + + <40-char SHA1> + +.. _Unix_time: https://en.wikipedia.org/wiki/Unix_time + +.. _sha1sum: https://manpages.debian.org/unstable/coreutils/sha1sum.1.en.html + +Canonical Sample Set +-------------------- + +The canonical raw sample data set is `raw.pixls.us `_. +It is freely licensed - all new samples are in Public Domain under +`CC0 1.0 `_ license (85+% of samples and counting), +however some older samples are still under more restrictive +`CC BY-NC-SA 4.0 `_ license. + +**Please read** `this `_ **for more info on how to contribute samples!** + +.. _RPU: https://raw.pixls.us/ + +.. _CC0: https://creativecommons.org/publicdomain/zero/1.0/ + +.. _BYNCSA40: http://creativecommons.org/licenses/by-nc-sa/4.0/ + +.. _rpu-post: https://discuss.pixls.us/t/raw-samples-wanted/5420?u=lebedevri + +Full sample set +~~~~~~~~~~~~~~~ + +The complete set, that includes every sample available, and thus has as good +coverage as we can get, but as downside it is *quite* bulky - |rpu-button-size| +total, spanning |rpu-button-samples|. + +.. |rpu-button-cameras| image:: https://raw.pixls.us/button-cameras.svg + :target: https://raw.pixls.us/ + +.. |rpu-button-samples| image:: https://raw.pixls.us/button-samples.svg + :target: https://raw.pixls.us/ + +It is accessible at: https://raw.pixls.us/data/ + +Masterset +~~~~~~~~~ + +But there is also a masterset, with just a handful hand-picked samples that +provide reasonable-ish coverage while spanning only ~ :math:`1/22`'th of the +disk footprint and ~ :math:`1/44`` sample count of the full set. + +.. CAUTION:: + Unless you want to perform rigorous regression testing + the masterset is strongly recommended! + +.. TIP:: + Masterset **only** contains samples that are in `public domain `_. + +It is accessible at: https://raw.pixls.us/data-unique/ + +.. |rpu-button-size| image:: https://raw.pixls.us/button-size.svg + +.. _rpu_rsync: + +Acquiring Canonical Sample Set +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Pick which sample set you will want to acquire. Be wary of disk footprint! +Probably the easiest way to fetch it is via rsync_, for example: + +:: + + $ rsync -vvrLtW --preallocate --delete --compress --compress-level=1 --progress \ + rsync://raw.pixls.us/data-unique/ ~/raw-camera-samples/raw.pixls.us-unique/ + $ # it might be a good idea to verify consistency afterwards: + $ sha1sum -c --strict ~/raw-camera-samples/raw.pixls.us-unique/filelist.sha1 + +.. _rsync: https://manpages.debian.org/unstable/rsync/rsync.1.en.html diff --git a/docs/index.rst b/docs/index.rst index a28c8181d..3fd5c84cd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,6 +12,8 @@ Welcome to RawSpeed's documentation! self CameraSupport + ReferenceSampleArchive + IntegrationTesting Doxygen lnt/index.rst diff --git a/fuzz/librawspeed/decompressors/HuffmanTable/Common.h b/fuzz/librawspeed/decompressors/HuffmanTable/Common.h index ecbe44597..f3fef06cb 100644 --- a/fuzz/librawspeed/decompressors/HuffmanTable/Common.h +++ b/fuzz/librawspeed/decompressors/HuffmanTable/Common.h @@ -18,6 +18,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#pragma once + #include "io/Buffer.h" // for Buffer #include "io/ByteStream.h" // for ByteStream diff --git a/fuzz/librawspeed/fuzz/Common.h b/fuzz/librawspeed/fuzz/Common.h index e9d3ccdd5..c6d081faf 100644 --- a/fuzz/librawspeed/fuzz/Common.h +++ b/fuzz/librawspeed/fuzz/Common.h @@ -18,6 +18,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#pragma once + #include "common/RawImage.h" // for RawImage #include "metadata/ColorFilterArray.h" // for ColorFilterArray diff --git a/lnt/README.rst b/lnt/README.rst index eae01bfb7..12349587f 100644 --- a/lnt/README.rst +++ b/lnt/README.rst @@ -1,5 +1,7 @@ .. _my-label: lnt +.. _lnt: + ================================= LLVM LNT / Test-Suite Integration ================================= @@ -15,15 +17,16 @@ Prerequisites * all of the normal prerequisites for building **development** version of RawSpeed. * python's `virtualenv `_ * `llvm-size`, `llvm-lit `_ (from - `llvm `_, - `llvm-tools `_ packages) - * A checkout of raw sample archive of you choice. - - It is suggested to use `https://raw.pixls.us `_ - masterset (see CI scripts for how to get it) + `llvm `_, + `llvm-tools `_ packages) + * A checkout of raw sample archive of you choice. It is suggested to use + `https://raw.pixls.us `_ masterset. + Please see :ref:`RSA` page for details. * Reference hashes for the raws in the sampleset. Generate them via ``$ ninja rstest-create`` from your **trusted** (!) dev build. + Please see :ref:`integration_testing` and + :ref:`producing_trusted_reference_hashes` pages for details. Getting it done --------------- @@ -55,8 +58,9 @@ Getting it done # View results. $SANDBOX/bin/lnt runserver $PERFDB -See also --------- + +.. seealso:: + * https://llvm.org/docs/TestSuiteGuide.html#common-configuration-options * ``$ $SANDBOX/bin/lnt runtest test_suite --help`` * ``$ $SANDBOX/bin/lnt --help`` diff --git a/src/config.h.in b/src/config.h.in index cfa6f8c66..870704b71 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -86,3 +86,6 @@ #ifndef __has_extension #define __has_extension __has_feature // Compatibility with pre-3.0 compilers. #endif + +#define RAWSPEED_UNLIKELY_FUNCTION __attribute__((cold)) +#define RAWSPEED_NOINLINE __attribute__((noinline)) diff --git a/src/librawspeed/common/Array2DRef.h b/src/librawspeed/common/Array2DRef.h index ec3a6e16f..a67f68e1a 100644 --- a/src/librawspeed/common/Array2DRef.h +++ b/src/librawspeed/common/Array2DRef.h @@ -33,6 +33,8 @@ template class Array2DRef { friend Array2DRef; // We need to be able to convert to const version. + inline T& operator[](int row) const; + public: using value_type = T; using cvless_value_type = typename std::remove_cv::type; @@ -62,7 +64,7 @@ template class Array2DRef { return {storage->data(), width, height}; } - inline T& operator()(int x, int y) const; + inline T& operator()(int row, int col) const; }; template @@ -74,14 +76,18 @@ Array2DRef::Array2DRef(T* data, const int dataWidth, const int dataHeight, _pitch = (dataPitch == 0 ? dataWidth : dataPitch); } -template -T& Array2DRef::operator()(const int x, const int y) const { +template T& Array2DRef::operator[](const int row) const { assert(_data); - assert(x >= 0); - assert(y >= 0); - assert(x < width); - assert(y < height); - return _data[y * _pitch + x]; + assert(row >= 0); + assert(row < height); + return _data[row * _pitch]; +} + +template +T& Array2DRef::operator()(const int row, const int col) const { + assert(col >= 0); + assert(col < width); + return (&(operator[](row)))[col]; } } // namespace rawspeed diff --git a/src/librawspeed/common/CMakeLists.txt b/src/librawspeed/common/CMakeLists.txt index cab66c1ce..19e5b44a3 100644 --- a/src/librawspeed/common/CMakeLists.txt +++ b/src/librawspeed/common/CMakeLists.txt @@ -50,3 +50,7 @@ target_sources(rawspeed_get_number_of_processor_cores PRIVATE ) target_link_libraries(rawspeed_get_number_of_processor_cores PRIVATE rawspeed) + +if(TARGET RawSpeed::OpenMP_CXX) + target_link_libraries(rawspeed_get_number_of_processor_cores PRIVATE RawSpeed::OpenMP_CXX) +endif() diff --git a/src/librawspeed/common/Common.h b/src/librawspeed/common/Common.h index 413dbc94a..430ddb171 100644 --- a/src/librawspeed/common/Common.h +++ b/src/librawspeed/common/Common.h @@ -148,6 +148,29 @@ clampBits(T value, unsigned int nBits, return clampBits(value, nBits); } +template +inline constexpr bool __attribute__((const)) +isIntN(T value, unsigned int nBits, + typename std::enable_if::value>::type* /*unused*/ = + nullptr) { + assert(nBits < CHAR_BIT * sizeof(T) && "Check must not be tautological."); + using UnsignedT = typename std::make_unsigned::type; + const auto highBits = static_cast(value) >> nBits; + return highBits == 0; +} + +template +inline constexpr typename std::make_signed::type __attribute__((const)) +signExtend( + T value, unsigned int nBits, + typename std::enable_if::value>::type* /*unused*/ = + nullptr) { + assert(nBits != 0 && "Only valid for non-zero bit count."); + const T SpareSignBits = CHAR_BIT * sizeof(T) - nBits; + using SignedT = typename std::make_signed::type; + return static_cast(value << SpareSignBits) >> SpareSignBits; +} + // Trim both leading and trailing spaces from the string inline std::string trimSpaces(const std::string& str) { diff --git a/src/librawspeed/common/Point.h b/src/librawspeed/common/Point.h index 8af32ef9d..679547ad5 100644 --- a/src/librawspeed/common/Point.h +++ b/src/librawspeed/common/Point.h @@ -77,15 +77,10 @@ class iPoint2D { area_type __attribute__((pure)) area() const { using signed_area = std::make_signed::type; - if (x >= 0 && y >= 0) - return static_cast(x) * static_cast(y); - if (x >= 0 && y < 0) - return static_cast(x) * (-1 * static_cast(y)); - if (y >= 0 && x < 0) - return static_cast(y) * (-1 * static_cast(x)); + area_type x_abs = std::abs(static_cast(x)); + area_type y_abs = std::abs(static_cast(y)); - assert(x < 0 && y < 0); - return static_cast(x) * static_cast(y); + return x_abs * y_abs; } constexpr bool isThisInside(const iPoint2D& rhs) const { @@ -93,7 +88,10 @@ class iPoint2D { } constexpr iPoint2D getSmallest(const iPoint2D& rhs) const { - return {x < rhs.x ? x : rhs.x, y < rhs.y ? y : rhs.y}; + return { + std::min(x, rhs.x), + std::min(y, rhs.y), + }; } value_type x = 0; @@ -160,7 +158,7 @@ class iRectangle2D { void setSize(const iPoint2D& size) { dim = size; } void setSize(iPoint2D&& size) { dim = size; } - /* Crop, so area is postitive, and return true, if there is any area left */ + /* Crop, so area is positive, and return true, if there is any area left */ /* This will ensure that bottomright is never on the left/top of the offset */ bool cropArea() { dim.x = std::max(0, dim.x); diff --git a/src/librawspeed/common/RawImage.cpp b/src/librawspeed/common/RawImage.cpp index 5e87a9215..af80194a2 100644 --- a/src/librawspeed/common/RawImage.cpp +++ b/src/librawspeed/common/RawImage.cpp @@ -46,11 +46,11 @@ RawImageData::RawImageData() : cfa(iPoint2D(0, 0)) { blackLevelSeparate.fill(-1); } -RawImageData::RawImageData(const iPoint2D& _dim, uint32_t _bpc, uint32_t _cpp) +RawImageData::RawImageData(const iPoint2D& _dim, int _bpc, int _cpp) : dim(_dim), isCFA(_cpp == 1), cfa(iPoint2D(0, 0)), cpp(_cpp) { assert(_bpc > 0); - if (cpp > std::numeric_limits::max() / _bpc) + if (cpp > std::numeric_limits::max() / _bpc) ThrowRDE("Components-per-pixel is too large."); bpp = _bpc * _cpp; @@ -221,14 +221,14 @@ uint8_t* RawImageData::getData() const { } uint8_t* RawImageData::getData(uint32_t x, uint32_t y) { + x += mOffset.x; + y += mOffset.y; + if (x >= static_cast(uncropped_dim.x)) ThrowRDE("X Position outside image requested."); if (y >= static_cast(uncropped_dim.y)) ThrowRDE("Y Position outside image requested."); - x += mOffset.x; - y += mOffset.y; - if (!data) ThrowRDE("Data not yet allocated."); @@ -463,7 +463,7 @@ void RawImageData::expandBorder(iRectangle2D validData) uint8_t* src_pos = getData(validData.pos.x, y); uint8_t* dst_pos = getData(validData.pos.x - 1, y); for (int x = validData.pos.x; x >= 0; x--) { - for (uint32_t i = 0; i < bpp; i++) { + for (int i = 0; i < bpp; i++) { dst_pos[i] = src_pos[i]; } dst_pos -= bpp; @@ -477,7 +477,7 @@ void RawImageData::expandBorder(iRectangle2D validData) uint8_t* src_pos = getData(pos - 1, y); uint8_t* dst_pos = getData(pos, y); for (int x = pos; x < dim.x; x++) { - for (uint32_t i = 0; i < bpp; i++) { + for (int i = 0; i < bpp; i++) { dst_pos[i] = src_pos[i]; } dst_pos += bpp; diff --git a/src/librawspeed/common/RawImage.h b/src/librawspeed/common/RawImage.h index 4bebd1b79..98ed2623c 100644 --- a/src/librawspeed/common/RawImage.h +++ b/src/librawspeed/common/RawImage.h @@ -21,18 +21,21 @@ #pragma once #include "rawspeedconfig.h" -#include "ThreadSafetyAnalysis.h" // for GUARDED_BY, REQUIRES -#include "common/Common.h" // for uint32_t, uint8_t, uint16_t, wri... -#include "common/ErrorLog.h" // for ErrorLog -#include "common/Mutex.h" // for Mutex -#include "common/Point.h" // for iPoint2D, iRectangle2D (ptr o... -#include "common/TableLookUp.h" // for TableLookUp -#include "metadata/BlackArea.h" // for BlackArea -#include "metadata/ColorFilterArray.h" // for ColorFilterArray -#include // for array -#include // for unique_ptr, operator== -#include // for string -#include // for vector +#include "ThreadSafetyAnalysis.h" // for GUARDED_BY, REQUIRES +#include "common/Array2DRef.h" // for Array2DRef +#include "common/Common.h" // for uint32_t, uint8_t, uint16_t, wri... +#include "common/ErrorLog.h" // for ErrorLog +#include "common/Mutex.h" // for Mutex +#include "common/Point.h" // for iPoint2D, iRectangle2D (ptr o... +#include "common/TableLookUp.h" // for TableLookUp +#include "decoders/RawDecoderException.h" // for ThrowRDE +#include "metadata/BlackArea.h" // for BlackArea +#include "metadata/ColorFilterArray.h" // for ColorFilterArray +#include // for array +#include // for assert +#include // for unique_ptr, operator== +#include // for string +#include // for vector namespace rawspeed { @@ -107,11 +110,13 @@ class RawImageData : public ErrorLog { void blitFrom(const RawImage& src, const iPoint2D& srcPos, const iPoint2D& size, const iPoint2D& destPos); rawspeed::RawImageType getDataType() const { return dataType; } + inline Array2DRef getU16DataAsUncroppedArray2DRef() const noexcept; uint8_t* getData() const; uint8_t* getData(uint32_t x, uint32_t y); // Not super fast, but safe. Don't use per pixel. uint8_t* getDataUncropped(uint32_t x, uint32_t y); + void subFrame(iRectangle2D cropped); void clearArea(iRectangle2D area, uint8_t value = 0); iPoint2D __attribute__((pure)) getUncroppedDim() const; @@ -130,7 +135,7 @@ class RawImageData : public ErrorLog { bool isAllocated() {return !!data;} void createBadPixelMap(); iPoint2D dim; - uint32_t pitch = 0; + int pitch = 0; // padding is the size of the area after last pixel of line n // and before the first pixel of line n+1 @@ -162,15 +167,15 @@ class RawImageData : public ErrorLog { protected: RawImageType dataType; RawImageData(); - RawImageData(const iPoint2D& dim, uint32_t bpp, uint32_t cpp = 1); + RawImageData(const iPoint2D& dim, int bpp, int cpp = 1); virtual void scaleValues(int start_y, int end_y) = 0; virtual void doLookup(int start_y, int end_y) = 0; virtual void fixBadPixel(uint32_t x, uint32_t y, int component = 0) = 0; void fixBadPixelsThread(int start_y, int end_y); void startWorker(RawImageWorker::RawImageWorkerTask task, bool cropped ); uint8_t* data = nullptr; - uint32_t cpp = 1; // Components per pixel - uint32_t bpp = 0; // Bytes per pixel. + int cpp = 1; // Components per pixel + int bpp = 0; // Bytes per pixel. friend class RawImage; iPoint2D mOffset; iPoint2D uncropped_dim; @@ -259,6 +264,15 @@ inline RawImage RawImage::create(const iPoint2D& dim, RawImageType type, } } +inline Array2DRef +RawImageData::getU16DataAsUncroppedArray2DRef() const noexcept { + assert(dataType == TYPE_USHORT16 && + "Attempting to access floating-point buffer as uint16_t."); + assert(data && "Data not yet allocated."); + return {reinterpret_cast(data), cpp * dim.x, dim.y, + static_cast(pitch / sizeof(uint16_t))}; +} + // setWithLookUp will set a single pixel by using the lookup table if supplied, // You must supply the destination where the value should be written, and a pointer to // a value that will be used to store a random counter that can be reused between calls. diff --git a/src/librawspeed/common/RawspeedException.h b/src/librawspeed/common/RawspeedException.h index 68ea2b96c..bb7a4a5ef 100644 --- a/src/librawspeed/common/RawspeedException.h +++ b/src/librawspeed/common/RawspeedException.h @@ -33,8 +33,9 @@ namespace rawspeed { template -[[noreturn]] void __attribute__((noreturn, noinline, format(printf, 1, 2))) -ThrowException(const char* fmt, ...) { +[[noreturn]] void RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + __attribute__((noreturn, format(printf, 1, 2))) + ThrowException(const char* fmt, ...) { static constexpr size_t bufSize = 8192; #if defined(HAVE_CXX_THREAD_LOCAL) static thread_local std::array buf; @@ -56,15 +57,15 @@ ThrowException(const char* fmt, ...) { class RawspeedException : public std::runtime_error { private: - static void log(const char* msg) { + static void RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + log(const char* msg) { writeLog(DEBUG_PRIO_EXTRA, "EXCEPTION: %s", msg); } public: - explicit RawspeedException(const std::string& msg) : std::runtime_error(msg) { - log(msg.c_str()); - } - explicit RawspeedException(const char* msg) : std::runtime_error(msg) { + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + RawspeedException(const char* msg) + : std::runtime_error(msg) { log(msg); } }; diff --git a/src/librawspeed/common/Spline.h b/src/librawspeed/common/Spline.h index d4bb49992..12338b3f3 100644 --- a/src/librawspeed/common/Spline.h +++ b/src/librawspeed/common/Spline.h @@ -97,7 +97,7 @@ class Spline final { s.d = (sn.c - s.c) / (3. * h[i]); } - // The last segment is nonsensical, and was only used to temporairly store + // The last segment is nonsensical, and was only used to temporarily store // the a and c to simplify calculations, so drop that 'segment' now segments.pop_back(); diff --git a/src/librawspeed/decoders/DngDecoder.cpp b/src/librawspeed/decoders/DngDecoder.cpp index 88ead5838..880aa2793 100644 --- a/src/librawspeed/decoders/DngDecoder.cpp +++ b/src/librawspeed/decoders/DngDecoder.cpp @@ -475,7 +475,8 @@ void DngDecoder::handleMetadata(const TiffIFD* raw) { ThrowRDE("Error decoding default crop size"); iPoint2D size(sz[0], sz[1]); - if ((size + cropped.pos).isThisInside(mRaw->dim)) + if (size.isThisInside(mRaw->dim) && + (size + cropped.pos).isThisInside(mRaw->dim)) cropped.dim = size; if (!cropped.hasPositiveArea()) diff --git a/src/librawspeed/decoders/IiqDecoder.cpp b/src/librawspeed/decoders/IiqDecoder.cpp index 63cc69d6a..55828ceb5 100644 --- a/src/librawspeed/decoders/IiqDecoder.cpp +++ b/src/librawspeed/decoders/IiqDecoder.cpp @@ -66,7 +66,7 @@ bool IiqDecoder::isAppropriateDecoder(const TiffRootIFD* rootIFD, const std::string& make = id.make; return IiqDecoder::isAppropriateDecoder(file) && - (make == "Phase One A/S" || make == "Leaf"); + (make == "Phase One A/S" || make == "Phase One" || make == "Leaf"); } // FIXME: this is very close to SamsungV0Decompressor::computeStripes() @@ -187,7 +187,7 @@ RawImage IiqDecoder::decodeRawInternal() { } // FIXME: could be wrong. max "active pixels" in "Sensor+" mode - "101 MP" - if (width == 0 || height == 0 || width > 11976 || height > 8852) + if (width == 0 || height == 0 || width > 11976 || height > 8854) ThrowRDE("Unexpected image dimensions found: (%u; %u)", width, height); if (split_col > width || split_row > height) @@ -315,18 +315,19 @@ void IiqDecoder::CorrectQuadrantMultipliersCombined(ByteStream data, for (int quadRow = 0; quadRow < 2; quadRow++) { for (int quadCol = 0; quadCol < 2; quadCol++) { + const Array2DRef img(mRaw->getU16DataAsUncroppedArray2DRef()); + const Spline<> s(control_points[quadRow][quadCol]); const std::vector curve = s.calculateCurve(); int row_start = quadRow == 0 ? 0 : split_row; - int row_end = quadRow == 0 ? split_row : mRaw->dim.y; + int row_end = quadRow == 0 ? split_row : img.height; int col_start = quadCol == 0 ? 0 : split_col; - int col_end = quadCol == 0 ? split_col : mRaw->dim.x; + int col_end = quadCol == 0 ? split_col : img.width; for (int row = row_start; row < row_end; row++) { - auto* pixel = - reinterpret_cast(mRaw->getData(col_start, row)); - for (int col = col_start; col < col_end; col++, pixel++) { + for (int col = col_start; col < col_end; col++) { + uint16_t& pixel = img(row, col); // This adjustment is expected to be made with the // black-level already subtracted from the pixel values. // Because this is kept as metadata and not subtracted at @@ -334,8 +335,8 @@ void IiqDecoder::CorrectQuadrantMultipliersCombined(ByteStream data, // appropriate amount before indexing into the curve and // then add it back so that subtracting the black level // later will work as expected - const uint16_t diff = *pixel < black_level ? *pixel : black_level; - *pixel = curve[*pixel - diff] + diff; + const uint16_t diff = pixel < black_level ? pixel : black_level; + pixel = curve[pixel - diff] + diff; } } } @@ -390,9 +391,7 @@ void IiqDecoder::handleBadPixel(const uint16_t col, const uint16_t row) { } void IiqDecoder::correctBadColumn(const uint16_t col) { - const Array2DRef img(reinterpret_cast(mRaw->getData()), - mRaw->dim.x, mRaw->dim.y, - mRaw->pitch / sizeof(uint16_t)); + const Array2DRef img(mRaw->getU16DataAsUncroppedArray2DRef()); for (int row = 2; row < mRaw->dim.y - 2; row++) { if (mRaw->cfa.getColorAt(col, row) == CFA_GREEN) { @@ -408,10 +407,10 @@ void IiqDecoder::correctBadColumn(const uint16_t col) { std::array val; std::array dev; int32_t sum = 0; - sum += val[0] = img(col - 1, row - 1); - sum += val[1] = img(col - 1, row + 1); - sum += val[2] = img(col + 1, row - 1); - sum += val[3] = img(col + 1, row + 1); + sum += val[0] = img(row - 1, col - 1); + sum += val[1] = img(row + 1, col - 1); + sum += val[2] = img(row - 1, col + 1); + sum += val[3] = img(row + 1, col + 1); for (int i = 0; i < 4; i++) { dev[i] = std::abs((val[i] * 4) - sum); if (dev[max] < dev[i]) @@ -419,7 +418,7 @@ void IiqDecoder::correctBadColumn(const uint16_t col) { } const int three_pixels = sum - val[max]; // This is `std::lround(three_pixels / 3.0)`, but without FP. - img(col, row) = (three_pixels + 1) / 3; + img(row, col) = (three_pixels + 1) / 3; } else { /* * Do non-green pixels. Let's pretend we are in "R" pixel, in the middle: @@ -431,11 +430,11 @@ void IiqDecoder::correctBadColumn(const uint16_t col) { * We have 6 other "R" pixels - 2 by horizontal, 4 by diagonals. * We need to combine them, to get the value of the pixel we are in. */ - uint32_t diags = img(col - 2, row + 2) + img(col - 2, row - 2) + - img(col + 2, row + 2) + img(col + 2, row - 2); - uint32_t horiz = img(col - 2, row) + img(col + 2, row); + uint32_t diags = img(row + 2, col - 2) + img(row - 2, col - 2) + + img(row + 2, col + 2) + img(row - 2, col + 2); + uint32_t horiz = img(row, col - 2) + img(row, col + 2); // But this is not just averaging, we bias towards the horizontal pixels. - img(col, row) = std::lround(diags * 0.0732233 + horiz * 0.3535534); + img(row, col) = std::lround(diags * 0.0732233 + horiz * 0.3535534); } } } diff --git a/src/librawspeed/decoders/NefDecoder.cpp b/src/librawspeed/decoders/NefDecoder.cpp index 4ca537467..6b874f906 100644 --- a/src/librawspeed/decoders/NefDecoder.cpp +++ b/src/librawspeed/decoders/NefDecoder.cpp @@ -304,47 +304,41 @@ void NefDecoder::DecodeUncompressed() { } } -void NefDecoder::readCoolpixSplitRaw(const ByteStream& input, - const iPoint2D& size, +void NefDecoder::readCoolpixSplitRaw(ByteStream input, const iPoint2D& size, const iPoint2D& offset, int inputPitch) { - uint8_t* data = mRaw->getData(); - uint32_t outPitch = mRaw->pitch; - uint32_t w = size.x; - uint32_t h = size.y; - uint32_t cpp = mRaw->getCpp(); - if (input.getRemainSize() < (inputPitch*h)) { - if (static_cast(input.getRemainSize()) > inputPitch) - h = input.getRemainSize() / inputPitch - 1; - else - ThrowIOE( - "Not enough data to decode a single line. Image file truncated."); - } - - if (offset.y > mRaw->dim.y) - ThrowRDE("Invalid y offset"); - if (offset.x + size.x > mRaw->dim.x) - ThrowRDE("Invalid x offset"); - - uint32_t y = offset.y; - h = min(h + static_cast(offset.y), - static_cast(mRaw->dim.y)); - w *= cpp; - h /= 2; - BitPumpMSB in(input); - for (; y < h; y++) { - auto* dest = reinterpret_cast( - &data[offset.x * sizeof(uint16_t) * cpp + y * 2 * outPitch]); - for (uint32_t x = 0; x < w; x++) { - dest[x] = in.getBits(12); - } - } - for (y = offset.y; y < h; y++) { - auto* dest = reinterpret_cast( - &data[offset.x * sizeof(uint16_t) * cpp + (y * 2 + 1) * outPitch]); - for (uint32_t x = 0; x < w; x++) { - dest[x] = in.getBits(12); - } + const Array2DRef img(mRaw->getU16DataAsUncroppedArray2DRef()); + + if (size.y % 2 != 0) + ThrowRDE("Odd number of rows"); + if (size.x % 8 != 0) + ThrowRDE("Column count isn't multiple of 8"); + if (inputPitch != ((3 * size.x) / 2)) + ThrowRDE("Unexpected input pitch"); + + // BitPumpMSB loads exactly 4 bytes at once, and we squeeze 12 bits each time. + // We produce 2 pixels per 3 bytes (24 bits). If we want to be smart and to + // know where the first input bit for first odd row is, the input slice width + // must be a multiple of 8 pixels. + + if (offset.x > mRaw->dim.x || offset.y > mRaw->dim.y) + ThrowRDE("All pixels outside of image"); + if (offset.x + size.x > mRaw->dim.x || offset.y + size.y > mRaw->dim.y) + ThrowRDE("Output is partailly out of image"); + + // The input bytes are laid out in the memory in the following way: + // First, all even (0-2-4-) rows, and then all odd (1-3-5-) rows. + BitPumpMSB even(input.getStream(size.y / 2, inputPitch)); + BitPumpMSB odd(input.getStream(size.y / 2, inputPitch)); + for (int row = offset.y; row < size.y;) { + for (int col = offset.x; col < size.x; ++col) + img(row, col) = even.getBits(12); + ++row; + for (int col = offset.x; col < size.x; ++col) + img(row, col) = odd.getBits(12); + ++row; } + assert(even.getRemainSize() == 0 && odd.getRemainSize() == 0 && + "Should have run out of input"); } void NefDecoder::DecodeD100Uncompressed() { @@ -358,6 +352,10 @@ void NefDecoder::DecodeD100Uncompressed() { mRaw->dim = iPoint2D(width, height); mRaw->createData(); + ByteStream bs(DataBuffer(mFile->getSubView(offset), Endianness::little)); + if (bs.getRemainSize() == 0) + ThrowRDE("No input to decode!"); + UncompressedDecompressor u( ByteStream(DataBuffer(mFile->getSubView(offset), Endianness::little)), mRaw); @@ -381,8 +379,7 @@ void NefDecoder::DecodeSNefUncompressed() { mRaw->createData(); ByteStream in(DataBuffer(mFile->getSubView(offset), Endianness::little)); - - DecodeNikonSNef(&in, width, height); + DecodeNikonSNef(in); } void NefDecoder::checkSupportInternal(const CameraMetaData* meta) { @@ -615,12 +612,9 @@ void NefDecoder::decodeMetaDataInternal(const CameraMetaData* meta) { // We un-apply the whitebalance, so output matches lossless. // Note that values are scaled. See comment below on details. // OPTME: It would be trivial to run this multithreaded. -void NefDecoder::DecodeNikonSNef(ByteStream* input, uint32_t w, uint32_t h) { - if (w < 6) - ThrowIOE("got a %u wide sNEF, aborting", w); - - if (input->getRemainSize() < (w * h * 3)) - ThrowIOE("Not enough data to decode. Image file truncated."); +void NefDecoder::DecodeNikonSNef(const ByteStream& input) { + if (mRaw->dim.x < 6) + ThrowIOE("got a %u wide sNEF, aborting", mRaw->dim.x); // We need to read the applied whitebalance, since we should return // data before whitebalance, so we "unapply" it. @@ -661,14 +655,12 @@ void NefDecoder::DecodeNikonSNef(ByteStream* input, uint32_t w, uint32_t h) { uint16_t tmp; auto* tmpch = reinterpret_cast(&tmp); - uint8_t* data = mRaw->getData(); - uint32_t pitch = mRaw->pitch; - const uint8_t* in = input->getData(w * h * 3); + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + const uint8_t* in = input.peekData(out.width * out.height); - for (uint32_t y = 0; y < h; y++) { - auto* dest = reinterpret_cast(&data[y * pitch]); + for (int row = 0; row < out.height; row++) { uint32_t random = in[0] + (in[1] << 8) + (in[2] << 16); - for (uint32_t x = 0; x < w * 3; x += 6) { + for (int col = 0; col < out.width; col += 6) { uint32_t g1 = in[0]; uint32_t g2 = in[1]; uint32_t g3 = in[2]; @@ -685,7 +677,7 @@ void NefDecoder::DecodeNikonSNef(ByteStream* input, uint32_t w, uint32_t h) { float cb2 = cb; float cr2 = cr; // Interpolate right pixel. We assume the sample is aligned with left pixel. - if ((x+6) < w*3) { + if ((col + 6) < out.width) { g4 = in[3]; g5 = in[4]; g6 = in[5]; @@ -700,27 +692,27 @@ void NefDecoder::DecodeNikonSNef(ByteStream* input, uint32_t w, uint32_t h) { mRaw->setWithLookUp(clampBits(static_cast(y1 + 1.370705 * cr), 12), tmpch, &random); - dest[x] = clampBits((inv_wb_r * tmp + (1<<9)) >> 10, 15); + out(row, col) = clampBits((inv_wb_r * tmp + (1 << 9)) >> 10, 15); mRaw->setWithLookUp( clampBits(static_cast(y1 - 0.337633 * cb - 0.698001 * cr), 12), - reinterpret_cast(&dest[x + 1]), &random); + reinterpret_cast(&out(row, col + 1)), &random); mRaw->setWithLookUp(clampBits(static_cast(y1 + 1.732446 * cb), 12), tmpch, &random); - dest[x+2] = clampBits((inv_wb_b * tmp + (1<<9)) >> 10, 15); + out(row, col + 2) = clampBits((inv_wb_b * tmp + (1 << 9)) >> 10, 15); mRaw->setWithLookUp(clampBits(static_cast(y2 + 1.370705 * cr2), 12), tmpch, &random); - dest[x+3] = clampBits((inv_wb_r * tmp + (1<<9)) >> 10, 15); + out(row, col + 3) = clampBits((inv_wb_r * tmp + (1 << 9)) >> 10, 15); mRaw->setWithLookUp( clampBits(static_cast(y2 - 0.337633 * cb2 - 0.698001 * cr2), 12), - reinterpret_cast(&dest[x + 4]), &random); + reinterpret_cast(&out(row, col + 4)), &random); mRaw->setWithLookUp(clampBits(static_cast(y2 + 1.732446 * cb2), 12), tmpch, &random); - dest[x+5] = clampBits((inv_wb_b * tmp + (1<<9)) >> 10, 15); + out(row, col + 5) = clampBits((inv_wb_b * tmp + (1 << 9)) >> 10, 15); } } } diff --git a/src/librawspeed/decoders/NefDecoder.h b/src/librawspeed/decoders/NefDecoder.h index afbb5d782..65b73f168 100644 --- a/src/librawspeed/decoders/NefDecoder.h +++ b/src/librawspeed/decoders/NefDecoder.h @@ -60,9 +60,9 @@ class NefDecoder final : public AbstractTiffDecoder void DecodeUncompressed(); void DecodeD100Uncompressed(); void DecodeSNefUncompressed(); - void readCoolpixSplitRaw(const ByteStream& input, const iPoint2D& size, + void readCoolpixSplitRaw(ByteStream input, const iPoint2D& size, const iPoint2D& offset, int inputPitch); - void DecodeNikonSNef(ByteStream* input, uint32_t w, uint32_t h); + void DecodeNikonSNef(const ByteStream& input); std::string getMode(); std::string getExtendedMode(const std::string &mode); static std::vector gammaCurve(double pwr, double ts, int mode, diff --git a/src/librawspeed/decoders/RawDecoderException.h b/src/librawspeed/decoders/RawDecoderException.h index c29d1d09e..aee9706b6 100644 --- a/src/librawspeed/decoders/RawDecoderException.h +++ b/src/librawspeed/decoders/RawDecoderException.h @@ -21,6 +21,7 @@ #pragma once +#include "rawspeedconfig.h" #include "common/RawspeedException.h" // for ThrowException, RawspeedException #include // for string @@ -28,9 +29,9 @@ namespace rawspeed { class RawDecoderException : public RawspeedException { public: - explicit RawDecoderException(const std::string& msg) + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + RawDecoderException(const char* msg) : RawspeedException(msg) {} - explicit RawDecoderException(const char* msg) : RawspeedException(msg) {} }; #define ThrowRDE(...) \ diff --git a/src/librawspeed/decompressors/AbstractHuffmanTable.h b/src/librawspeed/decompressors/AbstractHuffmanTable.h index 4333eabec..96fe4c3aa 100644 --- a/src/librawspeed/decompressors/AbstractHuffmanTable.h +++ b/src/librawspeed/decompressors/AbstractHuffmanTable.h @@ -66,6 +66,9 @@ class AbstractHuffmanTable { }; protected: + bool fullDecode = true; + bool fixDNGBug16 = false; + inline size_t __attribute__((pure)) maxCodePlusDiffLength() const { return nCodesPerLength.size() - 1 + *(std::max_element(codeValues.cbegin(), codeValues.cend())); @@ -219,10 +222,34 @@ class AbstractHuffmanTable { } } + template + inline int processSymbol(BIT_STREAM& bs, CodeSymbol symbol, + int codeValue) const { + assert(symbol.code_len >= 0 && symbol.code_len <= 16); + + // If we were only looking for symbol's code value, then just return it. + if (!FULL_DECODE) + return codeValue; + + // Else, treat it as the length of following difference + // that we need to read and extend. + int diff_l = codeValue; + assert(diff_l >= 0 && diff_l <= 16); + + if (diff_l == 16) { + if (fixDNGBug16) + bs.skipBitsNoFill(16); + return -32768; + } + + assert(symbol.code_len + diff_l <= 32); + return diff_l ? extend(bs.getBitsNoFill(diff_l), diff_l) : 0; + } + // Figure F.12 – Extending the sign bit of a decoded value in V // WARNING: this is *not* your normal 2's complement sign extension! - // WARNING: the caller should check that len != 0 before calling the function inline static int __attribute__((const)) extend(uint32_t diff, uint32_t len) { + assert(len > 0); int32_t ret = diff; if ((diff & (1 << (len - 1))) == 0) ret -= (1 << len) - 1; diff --git a/src/librawspeed/decompressors/Cr2Decompressor.cpp b/src/librawspeed/decompressors/Cr2Decompressor.cpp index 41b4465f1..317437adc 100644 --- a/src/librawspeed/decompressors/Cr2Decompressor.cpp +++ b/src/librawspeed/decompressors/Cr2Decompressor.cpp @@ -144,16 +144,15 @@ void Cr2Decompressor::decodeN_X_Y() // * for <3,2,1>: 6 = 3*2*1 // * for <3,2,2>: 12 = 3*2*2 // and advances x by N_COMP*X_S_F and y by Y_S_F - constexpr int xStepSize = N_COMP * X_S_F; - constexpr int yStepSize = Y_S_F; + constexpr int sliceColStep = N_COMP * X_S_F; + constexpr int frameRowStep = Y_S_F; auto ht = getHuffmanTables(); auto pred = getInitialPredictors(); auto predNext = reinterpret_cast(mRaw->getDataUncropped(0, 0)); - BitPumpJPEG bitStream(input); + BitPumpJPEG bs(input); - uint32_t pixelPitch = mRaw->pitch / 2; // Pitch in pixel if (frame.cps != 3 && frame.w * frame.cps > 2 * frame.h) { // Fix Canon double height issue where Canon doubled the width and halfed // the height (e.g. with 5Ds), ask Canon. frame.w needs to stay as is here @@ -173,9 +172,13 @@ void Cr2Decompressor::decodeN_X_Y() for (const auto& width : {slicing.sliceWidth, slicing.lastSliceWidth}) { if (width > mRaw->dim.x) ThrowRDE("Slice is longer than image's height, which is unsupported."); - if (width % xStepSize != 0) { + if (width % sliceColStep != 0) { ThrowRDE("Slice width (%u) should be multiple of pixel group size (%u)", - width, xStepSize); + width, sliceColStep); + } + if (width % mRaw->getCpp() != 0) { + ThrowRDE("Slice width (%u) should be multiple of image cpp (%u)", width, + mRaw->getCpp()); } } @@ -183,67 +186,69 @@ void Cr2Decompressor::decodeN_X_Y() mRaw->getCpp() * mRaw->dim.area()) ThrowRDE("Incorrrect slice height / slice widths! Less than image size."); - unsigned processedPixels = 0; - unsigned processedLineSlices = 0; + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + unsigned globalFrameCol = 0; + unsigned globalFrameRow = 0; for (auto sliceId = 0; sliceId < slicing.numSlices; sliceId++) { const unsigned sliceWidth = slicing.widthOfSlice(sliceId); - assert(frame.h % yStepSize == 0); - for (unsigned y = 0; y < frame.h; y += yStepSize) { - // Fix for Canon 80D mraw format. - // In that format, `frame` is 4032x3402, while `mRaw` is 4536x3024. - // Consequently, the slices in `frame` wrap around plus there are few - // 'extra' sliced lines because sum(slicesW) * sliceH > mRaw->dim.area() - // Those would overflow, hence the break. - // see FIX_CANON_FRAME_VS_IMAGE_SIZE_MISMATCH - unsigned destY = processedLineSlices % mRaw->dim.y; - unsigned destX = processedLineSlices / mRaw->dim.y * - slicing.widthOfSlice(0) / mRaw->getCpp(); - if (destX >= static_cast(mRaw->dim.x)) + assert(frame.h % frameRowStep == 0); + for (unsigned sliceFrameRow = 0; sliceFrameRow < frame.h; + sliceFrameRow += frameRowStep, globalFrameRow += frameRowStep) { + unsigned row = globalFrameRow % mRaw->dim.y; + unsigned col = globalFrameRow / mRaw->dim.y * slicing.widthOfSlice(0) / + mRaw->getCpp(); + if (col >= static_cast(mRaw->dim.x)) break; - auto dest = - reinterpret_cast(mRaw->getDataUncropped(destX, destY)); - - assert(sliceWidth % xStepSize == 0); - if (X_S_F == 1) { - if (destX + sliceWidth > static_cast(mRaw->dim.x)) - ThrowRDE("Bad slice width / frame size / image size combination."); - if (((sliceId + 1) == slicing.numSlices) && - ((destX + sliceWidth) < static_cast(mRaw->dim.x))) - ThrowRDE("Insufficient slices - do not fill the entire image"); - } else { - // FIXME. - } - for (unsigned x = 0; x < sliceWidth; x += xStepSize) { + + assert(sliceWidth % mRaw->getCpp() == 0); + unsigned pixelsPerSliceRow = sliceWidth / mRaw->getCpp(); + if (col + pixelsPerSliceRow > static_cast(mRaw->dim.x)) + ThrowRDE("Bad slice width / frame size / image size combination."); + if (((sliceId + 1) == slicing.numSlices) && + (col + pixelsPerSliceRow != static_cast(mRaw->dim.x))) + ThrowRDE("Insufficient slices - do not fill the entire image"); + + col *= mRaw->getCpp(); + assert(sliceWidth % sliceColStep == 0); + for (unsigned sliceCol = 0; sliceCol < sliceWidth;) { // check if we processed one full raw row worth of pixels - if (processedPixels == frame.w) { + if (globalFrameCol == frame.w) { // if yes -> update predictor by going back exactly one row, // no matter where we are right now. // makes no sense from an image compression point of view, ask Canon. copy_n(predNext, N_COMP, pred.data()); - predNext = dest; - processedPixels = 0; + predNext = &out(row, col); + globalFrameCol = 0; } - if (X_S_F == 1) { // will be optimized out - unroll_loop([&](int i) { - dest[i] = pred[i] += ht[i]->decodeNext(bitStream); - }); - } else { - unroll_loop([&](int i) { - dest[0 + i*pixelPitch] = pred[0] += ht[0]->decodeNext(bitStream); - dest[3 + i*pixelPitch] = pred[0] += ht[0]->decodeNext(bitStream); - }); - - dest[1] = pred[1] += ht[1]->decodeNext(bitStream); - dest[2] = pred[2] += ht[2]->decodeNext(bitStream); + // How many pixel can we decode until we finish the row of either + // the frame (i.e. predictor change time), or of the current slice? + assert(frame.w % X_S_F == 0); + unsigned sliceColsRemainingInThisFrameRow = + sliceColStep * ((frame.w - globalFrameCol) / X_S_F); + unsigned sliceColsRemainingInThisSliceRow = sliceWidth - sliceCol; + unsigned sliceColsRemaining = std::min( + sliceColsRemainingInThisSliceRow, sliceColsRemainingInThisFrameRow); + assert(sliceColsRemaining >= sliceColStep && + (sliceColsRemaining % sliceColStep) == 0); + for (unsigned sliceColEnd = sliceCol + sliceColsRemaining; + sliceCol < sliceColEnd; sliceCol += sliceColStep, + globalFrameCol += X_S_F, col += sliceColStep) { + if (X_S_F == 1) { // will be optimized out + for (int c = 0; c < sliceColStep; ++c) + out(row, col + c) = pred[c] += ht[c]->decodeNext(bs); + } else { + for (int dstRow = 0; dstRow < Y_S_F; ++dstRow) { + for (int c : {0, 3}) + out(row + dstRow, col + c) = pred[0] += ht[0]->decodeNext(bs); + } + + for (int c : {1, 2}) + out(row, col + c) = pred[c] += ht[c]->decodeNext(bs); + } } - - dest += xStepSize; - processedPixels += X_S_F; } - - processedLineSlices += yStepSize; } } } diff --git a/src/librawspeed/decompressors/CrwDecompressor.cpp b/src/librawspeed/decompressors/CrwDecompressor.cpp index 838e4baee..ff1ef8a19 100644 --- a/src/librawspeed/decompressors/CrwDecompressor.cpp +++ b/src/librawspeed/decompressors/CrwDecompressor.cpp @@ -243,29 +243,26 @@ inline void CrwDecompressor::decodeBlock(std::array* diffBuf, // FIXME: this function is horrible. void CrwDecompressor::decompress() { - const uint32_t height = mRaw->dim.y; - const uint32_t width = mRaw->dim.x; + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + assert(out.width > 0); + assert(out.width % 4 == 0); + assert(out.height > 0); { - assert(width > 0); - assert(width % 4 == 0); - assert(height > 0); - // Each block encodes 64 pixels - assert((height * width) % 64 == 0); - const unsigned hBlocks = height * width / 64; + assert((out.height * out.width) % 64 == 0); + const unsigned hBlocks = out.height * out.width / 64; assert(hBlocks > 0); BitPumpJPEG lPump(rawInput); BitPumpJPEG iPump(rawInput); int carry = 0; - std::array base; + std::array base = {512, 512}; // starting predictors - uint32_t j = 0; - uint16_t* dest = nullptr; - uint32_t i = 0; + int row = 0; + int col = 0; for (unsigned block = 0; block < hBlocks; block++) { array diffBuf = {{}}; @@ -276,58 +273,44 @@ void CrwDecompressor::decompress() { diffBuf[0] += carry; carry = diffBuf[0]; - for (uint32_t k = 0; k < 64; k++) { - if (i % width == 0) { + for (uint32_t k = 0; k < 64; ++k, ++col) { + if (col == out.width) { // new line. sadly, does not always happen when k == 0. - i = 0; - - dest = reinterpret_cast(mRaw->getData(0, j)); - - j++; - base[0] = base[1] = 512; + col = 0; + row++; + base = {512, 512}; // reinit. } - assert(dest != nullptr); base[k & 1] += diffBuf[k]; - if (base[k & 1] >> 10) + if (!isIntN(base[k & 1], 10)) ThrowRDE("Error decompressing"); - *dest = base[k & 1]; - - i++; - dest++; + out(row, col) = base[k & 1]; } } - assert(j == height); - assert(i == width); + assert(row == (out.height - 1)); + assert(col == out.width); } // Add the uncompressed 2 low bits to the decoded 8 high bits if (lowbits) { - assert(width > 0); - assert(width % 4 == 0); - assert(height > 0); - - for (uint32_t j = 0; j < height; j++) { - auto* dest = reinterpret_cast(mRaw->getData(0, j)); - - assert(width % 4 == 0); - for (uint32_t i = 0; i < width; /* NOTE: i += 4 */) { + for (int row = 0; row < out.height; row++) { + for (int col = 0; col < out.width; /* NOTE: col += 4 */) { const uint8_t c = lowbitInput.getByte(); // LSB-packed: p3 << 6 | p2 << 4 | p1 << 2 | p0 << 0 // We have read 8 bits, which is 4 pairs of 2 bits. So process 4 pixels. - for (uint32_t p = 0; p < 4; p++) { + for (uint32_t p = 0; p < 4; ++p, ++col) { + uint16_t& pixel = out(row, col); + uint16_t low = (c >> (2 * p)) & 0b11; - uint16_t val = (*dest << 2) | low; + uint16_t val = (pixel << 2) | low; - if (width == 2672 && val < 512) + if (out.width == 2672 && val < 512) val += 2; // No idea why this is needed - *dest = val; - i++; - dest++; + pixel = val; } } } diff --git a/src/librawspeed/decompressors/FujiDecompressor.cpp b/src/librawspeed/decompressors/FujiDecompressor.cpp index 88c0ba271..bedc32e8b 100644 --- a/src/librawspeed/decompressors/FujiDecompressor.cpp +++ b/src/librawspeed/decompressors/FujiDecompressor.cpp @@ -173,6 +173,8 @@ template void FujiDecompressor::copy_line(fuji_compressed_block* info, const FujiStrip& strip, int cur_line, T&& idx) const { + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + std::array lineBufB; std::array lineBufG; std::array lineBufR; @@ -187,9 +189,6 @@ void FujiDecompressor::copy_line(fuji_compressed_block* info, } for (int row_count = 0; row_count < FujiStrip::lineHeight(); row_count++) { - auto* const raw_block_data = reinterpret_cast( - mRaw->getData(strip.offsetX(), strip.offsetY(cur_line) + row_count)); - for (int pixel_count = 0; pixel_count < strip.width(); pixel_count++) { uint16_t* line_buf = nullptr; @@ -210,7 +209,8 @@ void FujiDecompressor::copy_line(fuji_compressed_block* info, __builtin_unreachable(); } - raw_block_data[pixel_count] = line_buf[idx(pixel_count)]; + out(strip.offsetY(cur_line) + row_count, strip.offsetX() + pixel_count) = + line_buf[idx(pixel_count)]; } } } diff --git a/src/librawspeed/decompressors/HasselbladDecompressor.cpp b/src/librawspeed/decompressors/HasselbladDecompressor.cpp index bf423d33f..cc4f3ab48 100644 --- a/src/librawspeed/decompressors/HasselbladDecompressor.cpp +++ b/src/librawspeed/decompressors/HasselbladDecompressor.cpp @@ -66,28 +66,29 @@ void HasselbladDecompressor::decodeScan() { frame.w, frame.h, mRaw->dim.x, mRaw->dim.y); } - assert(frame.h > 0); - assert(frame.w > 0); - assert(frame.w % 2 == 0); + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + + assert(out.height > 0); + assert(out.width > 0); + assert(out.width % 2 == 0); const auto ht = getHuffmanTables<1>(); BitPumpMSB32 bitStream(input); // Pixels are packed two at a time, not like LJPEG: // [p1_length_as_huffman][p2_length_as_huffman][p0_diff_with_length][p1_diff_with_length]|NEXT PIXELS - for (uint32_t y = 0; y < frame.h; y++) { - auto* dest = reinterpret_cast(mRaw->getData(0, y)); + for (int row = 0; row < out.height; row++) { int p1 = 0x8000 + pixelBaseOffset; int p2 = 0x8000 + pixelBaseOffset; - for (uint32_t x = 0; x < frame.w; x += 2) { + for (int col = 0; col < out.width; col += 2) { int len1 = ht[0]->decodeLength(bitStream); int len2 = ht[0]->decodeLength(bitStream); p1 += getBits(&bitStream, len1); p2 += getBits(&bitStream, len2); // NOTE: this is rather unusual and weird, but appears to be correct. // clampBits(p, 16) results in completely garbled images. - dest[x] = uint16_t(p1); - dest[x + 1] = uint16_t(p2); + out(row, col) = uint16_t(p1); + out(row, col + 1) = uint16_t(p2); } } input.skipBytes(bitStream.getBufferPosition()); diff --git a/src/librawspeed/decompressors/HuffmanTableLUT.h b/src/librawspeed/decompressors/HuffmanTableLUT.h index d961f8fdb..358151f1e 100644 --- a/src/librawspeed/decompressors/HuffmanTableLUT.h +++ b/src/librawspeed/decompressors/HuffmanTableLUT.h @@ -21,9 +21,10 @@ #pragma once -#include "common/Common.h" // for uint32_t, uint16_t, int32_t +#include "common/Common.h" // for uint32_t, uint16_t, ... #include "decoders/RawDecoderException.h" // for ThrowRDE #include "decompressors/AbstractHuffmanTable.h" // for AbstractHuffmanTable +#include "decompressors/HuffmanTableLookup.h" // for HuffmanTableLookup #include "io/BitStream.h" // for BitStreamTraits #include // for assert #include // for size_t @@ -64,13 +65,7 @@ namespace rawspeed { -class HuffmanTableLUT final : public AbstractHuffmanTable { - // private fields calculated from codesPerBits and codeValues - // they are index '1' based, so we can directly lookup the value - // for code length l without decrementing - std::vector maxCodeOL; // index is length of code - std::vector codeOffsetOL; // index is length of code - +class HuffmanTableLUT final : public HuffmanTableLookup { // The code can be compiled with two different decode lookup table layouts. // The idea is that different CPU architectures may perform better with // one or the other, depending on the relative performance of their arithmetic @@ -96,38 +91,10 @@ class HuffmanTableLUT final : public AbstractHuffmanTable { std::vector decodeLookup; #endif - bool fullDecode = true; - bool fixDNGBug16 = false; - public: void setup(bool fullDecode_, bool fixDNGBug16_) { - this->fullDecode = fullDecode_; - this->fixDNGBug16 = fixDNGBug16_; - - assert(!nCodesPerLength.empty()); - assert(maxCodesCount() > 0); - - unsigned int maxCodeLength = nCodesPerLength.size() - 1U; - assert(codeValues.size() == maxCodesCount()); - - assert(maxCodePlusDiffLength() <= 32U); - - // Figure C.1: make table of Huffman code length for each symbol - // Figure C.2: generate the codes themselves - const auto symbols = generateCodeSymbols(); - assert(symbols.size() == maxCodesCount()); - - // Figure F.15: generate decoding tables - codeOffsetOL.resize(maxCodeLength + 1UL, 0xFFFF); - maxCodeOL.resize(maxCodeLength + 1UL, 0xFFFFFFFF); - int code_index = 0; - for (unsigned int l = 1U; l <= maxCodeLength; l++) { - if (nCodesPerLength[l]) { - codeOffsetOL[l] = symbols[code_index].code - code_index; - code_index += nCodesPerLength[l]; - maxCodeOL[l] = symbols[code_index - 1].code; - } - } + const std::vector symbols = + HuffmanTableLookup::setup(fullDecode_, fixDNGBug16_); // Generate lookup table for fast decoding lookup. // See definition of decodeLookup above @@ -148,10 +115,12 @@ class HuffmanTableLUT final : public AbstractHuffmanTable { // lookup bit depth is too small to fit both the encoded length // and the final difference value. // -> store only the length and do a normal sign extension later + assert(!fullDecode || diff_l > 0); decodeLookup[c] = diff_l << PayloadShift | code_l; } else { // diff_l + code_l <= lookupDepth // The table bit depth is large enough to store both. + assert(diff_l != 16); decodeLookup[c] = (code_l + diff_l) | FlagMask; if (diff_l) { @@ -165,14 +134,16 @@ class HuffmanTableLUT final : public AbstractHuffmanTable { } } - template inline int decodeLength(BIT_STREAM& bs) const { + template + inline __attribute__((always_inline)) int decodeLength(BIT_STREAM& bs) const { static_assert(BitStreamTraits::canUseWithHuffmanTable, "This BitStream specialization is not marked as usable here"); assert(!fullDecode); return decode(bs); } - template inline int decodeNext(BIT_STREAM& bs) const { + template + inline __attribute__((always_inline)) int decodeNext(BIT_STREAM& bs) const { static_assert(BitStreamTraits::canUseWithHuffmanTable, "This BitStream specialization is not marked as usable here"); assert(fullDecode); @@ -183,74 +154,46 @@ class HuffmanTableLUT final : public AbstractHuffmanTable { // one returning only the length of the of diff bits (see Hasselblad), // one to return the fully decoded diff. // All ifs depending on this bool will be optimized out by the compiler - template inline int decode(BIT_STREAM& bs) const { + template + inline __attribute__((always_inline)) int decode(BIT_STREAM& bs) const { static_assert(BitStreamTraits::canUseWithHuffmanTable, "This BitStream specialization is not marked as usable here"); assert(FULL_DECODE == fullDecode); - - // 32 is the absolute maximum combined length of code + diff - // assertion maxCodePlusDiffLength() <= 32U is already checked in setup() bs.fill(32); - // for processors supporting bmi2 instructions, using maxCodePlusDiffLength() - // might be beneficial + CodeSymbol partial; + partial.code_len = LookupDepth; + partial.code = bs.peekBitsNoFill(partial.code_len); - uint32_t code = bs.peekBitsNoFill(LookupDepth); - assert(code < decodeLookup.size()); - auto val = static_cast(decodeLookup[code]); - int len = val & LenMask; - assert(len >= 0); - assert(len <= 16); + assert(partial.code < decodeLookup.size()); + auto lutEntry = static_cast(decodeLookup[partial.code]); + int payload = static_cast(lutEntry) >> PayloadShift; + int len = lutEntry & LenMask; - // if the code is invalid (bitstream corrupted) len will be 0 + // How far did reading of those LookupDepth bits *actually* move us forward? bs.skipBitsNoFill(len); - if (FULL_DECODE && val & FlagMask) { - // if the flag bit is set, the payload is the already sign extended difference - return static_cast(val) >> PayloadShift; - } - - if (len) { - // if the flag bit is not set but len != 0, the payload is the number of bits to sign extend and return - const int l_diff = static_cast(val) >> PayloadShift; - assert((FULL_DECODE && (len + l_diff <= 32)) || !FULL_DECODE); - if (FULL_DECODE && l_diff == 16) { - if (fixDNGBug16) - bs.skipBitsNoFill(16); - return -32768; - } - return FULL_DECODE ? extend(bs.getBitsNoFill(l_diff), l_diff) : l_diff; - } - - uint32_t code_l = LookupDepth; - bs.skipBitsNoFill(code_l); - while (code_l < maxCodeOL.size() && - (0xFFFFFFFF == maxCodeOL[code_l] || code > maxCodeOL[code_l])) { - uint32_t temp = bs.getBitsNoFill(1); - code = (code << 1) | temp; - code_l++; - } - - if (code_l >= maxCodeOL.size() || - (0xFFFFFFFF == maxCodeOL[code_l] || code > maxCodeOL[code_l])) - ThrowRDE("bad Huffman code: %u (len: %u)", code, code_l); - - if (code < codeOffsetOL[code_l]) - ThrowRDE("likely corrupt Huffman code: %u (len: %u)", code, code_l); - - int diff_l = codeValues[code - codeOffsetOL[code_l]]; - - if (!FULL_DECODE) - return diff_l; - if (diff_l == 16) { - if (fixDNGBug16) - bs.skipBitsNoFill(16); - return -32768; + // If the flag bit is set, then the 'len' was code_l+value, + // and payload is the already-extended difference. + if (FULL_DECODE && lutEntry & FlagMask) + return payload; + + int codeValue; + if (lutEntry) { + // If the flag is not set, but the entry is not empty, + // the payload is the code value for this symbol. + partial.code_len = len; + codeValue = payload; + assert(!FULL_DECODE || codeValue /*aka diff_l*/ > 0); + } else { + // No match in the lookup table, because either the code is longer + // than LookupDepth or the input is corrupt. Need to read more bits... + assert(len == 0); + bs.skipBitsNoFill(partial.code_len); + std::tie(partial, codeValue) = finishReadingPartialSymbol(bs, partial); } - assert(FULL_DECODE); - assert((diff_l && (len + code_l + diff_l <= 32)) || !diff_l); - return diff_l ? extend(bs.getBitsNoFill(diff_l), diff_l) : 0; + return processSymbol(bs, partial, codeValue); } }; diff --git a/src/librawspeed/decompressors/HuffmanTableLookup.h b/src/librawspeed/decompressors/HuffmanTableLookup.h index 90d5cb6bc..e0e749bc7 100644 --- a/src/librawspeed/decompressors/HuffmanTableLookup.h +++ b/src/librawspeed/decompressors/HuffmanTableLookup.h @@ -63,18 +63,16 @@ namespace rawspeed { -class HuffmanTableLookup final : public AbstractHuffmanTable { +class HuffmanTableLookup : public AbstractHuffmanTable { +protected: // private fields calculated from codesPerBits and codeValues // they are index '1' based, so we can directly lookup the value // for code length l without decrementing std::vector maxCodeOL; // index is length of code std::vector codeOffsetOL; // index is length of code - bool fullDecode = true; - bool fixDNGBug16 = false; - public: - void setup(bool fullDecode_, bool fixDNGBug16_) { + std::vector setup(bool fullDecode_, bool fixDNGBug16_) { this->fullDecode = fullDecode_; this->fixDNGBug16 = fixDNGBug16_; @@ -88,20 +86,22 @@ class HuffmanTableLookup final : public AbstractHuffmanTable { // Figure C.1: make table of Huffman code length for each symbol // Figure C.2: generate the codes themselves - const auto symbols = generateCodeSymbols(); + std::vector symbols = generateCodeSymbols(); assert(symbols.size() == maxCodesCount()); // Figure F.15: generate decoding tables codeOffsetOL.resize(maxCodeLength + 1UL, 0xFFFF); maxCodeOL.resize(maxCodeLength + 1UL, 0xFFFFFFFF); - int code_index = 0; - for (unsigned int l = 1U; l <= maxCodeLength; l++) { - if (nCodesPerLength[l]) { - codeOffsetOL[l] = symbols[code_index].code - code_index; - code_index += nCodesPerLength[l]; - maxCodeOL[l] = symbols[code_index - 1].code; - } + for (unsigned int numCodesSoFar = 0, codeLen = 1; codeLen <= maxCodeLength; + codeLen++) { + if (!nCodesPerLength[codeLen]) + continue; + codeOffsetOL[codeLen] = symbols[numCodesSoFar].code - numCodesSoFar; + numCodesSoFar += nCodesPerLength[codeLen]; + maxCodeOL[codeLen] = symbols[numCodesSoFar - 1].code; } + + return symbols; } template inline int decodeLength(BIT_STREAM& bs) const { @@ -118,6 +118,42 @@ class HuffmanTableLookup final : public AbstractHuffmanTable { return decode(bs); } +protected: + template + inline std::pair + finishReadingPartialSymbol(BIT_STREAM& bs, CodeSymbol partial) const { + while (partial.code_len < maxCodeOL.size() && + (0xFFFFFFFF == maxCodeOL[partial.code_len] || + partial.code > maxCodeOL[partial.code_len])) { + uint32_t temp = bs.getBitsNoFill(1); + partial.code = (partial.code << 1) | temp; + partial.code_len++; + } + + if (partial.code_len >= maxCodeOL.size() || + (0xFFFFFFFF == maxCodeOL[partial.code_len] || + partial.code > maxCodeOL[partial.code_len]) || + partial.code < codeOffsetOL[partial.code_len]) + ThrowRDE("bad Huffman code: %u (len: %u)", partial.code, + partial.code_len); + + int codeValue = codeValues[partial.code - codeOffsetOL[partial.code_len]]; + + return {partial, codeValue}; + } + + template + inline std::pair + readSymbol(BIT_STREAM& bs) const { + // Start from completely unknown symbol. + CodeSymbol partial; + partial.code_len = 0; + partial.code = 0; + + return finishReadingPartialSymbol(bs, partial); + } + +public: // The bool template paraeter is to enable two versions: // one returning only the length of the of diff bits (see Hasselblad), // one to return the fully decoded diff. @@ -127,44 +163,13 @@ class HuffmanTableLookup final : public AbstractHuffmanTable { static_assert(BitStreamTraits::canUseWithHuffmanTable, "This BitStream specialization is not marked as usable here"); assert(FULL_DECODE == fullDecode); - - // 32 is the absolute maximum combined length of code + diff - // assertion maxCodePlusDiffLength() <= 32U is already checked in setup() bs.fill(32); - // for processors supporting bmi2 instructions, using - // maxCodePlusDiffLength() might be beneficial - - uint32_t code = 0; - uint32_t code_l = 0; - while (code_l < maxCodeOL.size() && - (0xFFFFFFFF == maxCodeOL[code_l] || code > maxCodeOL[code_l])) { - uint32_t temp = bs.getBitsNoFill(1); - code = (code << 1) | temp; - code_l++; - } - - if (code_l >= maxCodeOL.size() || - (0xFFFFFFFF == maxCodeOL[code_l] || code > maxCodeOL[code_l])) - ThrowRDE("bad Huffman code: %u (len: %u)", code, code_l); - - if (code < codeOffsetOL[code_l]) - ThrowRDE("likely corrupt Huffman code: %u (len: %u)", code, code_l); - - int diff_l = codeValues[code - codeOffsetOL[code_l]]; - - if (!FULL_DECODE) - return diff_l; - - if (diff_l == 16) { - if (fixDNGBug16) - bs.skipBitsNoFill(16); - return -32768; - } + CodeSymbol symbol; + int codeValue; + std::tie(symbol, codeValue) = readSymbol(bs); - assert(FULL_DECODE); - assert((diff_l && (code_l + diff_l <= 32)) || !diff_l); - return diff_l ? extend(bs.getBitsNoFill(diff_l), diff_l) : 0; + return processSymbol(bs, symbol, codeValue); } }; diff --git a/src/librawspeed/decompressors/HuffmanTableTree.h b/src/librawspeed/decompressors/HuffmanTableTree.h index 468ebee48..76f89d638 100644 --- a/src/librawspeed/decompressors/HuffmanTableTree.h +++ b/src/librawspeed/decompressors/HuffmanTableTree.h @@ -39,12 +39,10 @@ class HuffmanTableTree final : public AbstractHuffmanTable { BinaryHuffmanTree tree; - bool fullDecode = true; - bool fixDNGBug16 = false; - protected: template - inline ValueType getValue(BIT_STREAM& bs) const { + inline std::pair + readSymbol(BIT_STREAM& bs) const { static_assert(BitStreamTraits::canUseWithHuffmanTable, "This BitStream specialization is not marked as usable here"); CodeSymbol partial; @@ -75,7 +73,7 @@ class HuffmanTableTree final : public AbstractHuffmanTable { if (static_cast(*newNode) == decltype(tree)::Node::Type::Leaf) { // Ok, great, hit a Leaf. This is it. - return newNode->getAsLeaf().value; + return {partial, newNode->getAsLeaf().value}; } // Else, this is a branch, continue looking. @@ -147,20 +145,11 @@ class HuffmanTableTree final : public AbstractHuffmanTable { bs.fill(32); - const auto codeValue = getValue(bs); - - const int diff_l = codeValue; - - if (!FULL_DECODE) - return diff_l; - - if (diff_l == 16) { - if (fixDNGBug16) - bs.skipBitsNoFill(16); - return -32768; - } + CodeSymbol symbol; + int codeValue; + std::tie(symbol, codeValue) = readSymbol(bs); - return diff_l ? extend(bs.getBitsNoFill(diff_l), diff_l) : 0; + return processSymbol(bs, symbol, codeValue); } }; diff --git a/src/librawspeed/decompressors/HuffmanTableVector.h b/src/librawspeed/decompressors/HuffmanTableVector.h index 40bbc6bda..eafc8d1e4 100644 --- a/src/librawspeed/decompressors/HuffmanTableVector.h +++ b/src/librawspeed/decompressors/HuffmanTableVector.h @@ -33,15 +33,13 @@ namespace rawspeed { class HuffmanTableVector final : public AbstractHuffmanTable { std::vector symbols; - bool fullDecode = true; - bool fixDNGBug16 = false; - // Given this code len, which code id is the minimal? std::vector extrCodeIdForLen; // index is length of code protected: template - inline std::pair getSymbol(BIT_STREAM& bs) const { + inline std::pair + readSymbol(BIT_STREAM& bs) const { static_assert(BitStreamTraits::canUseWithHuffmanTable, "This BitStream specialization is not marked as usable here"); @@ -63,7 +61,7 @@ class HuffmanTableVector final : public AbstractHuffmanTable { codeId < extrCodeIdForLen[1U + partial.code_len]; codeId++) { const CodeSymbol& symbol = symbols[codeId]; if (symbol == partial) // yay, found? - return std::make_pair(symbol, codeId); + return {symbol, codeValues[codeId]}; } // Ok, but does any symbol have this same prefix? @@ -136,21 +134,11 @@ class HuffmanTableVector final : public AbstractHuffmanTable { bs.fill(32); - const auto got = getSymbol(bs); - const unsigned codeId = got.second; - - const int diff_l = codeValues[codeId]; - - if (!FULL_DECODE) - return diff_l; - - if (diff_l == 16) { - if (fixDNGBug16) - bs.skipBitsNoFill(16); - return -32768; - } + CodeSymbol symbol; + int codeValue; + std::tie(symbol, codeValue) = readSymbol(bs); - return diff_l ? extend(bs.getBitsNoFill(diff_l), diff_l) : 0; + return processSymbol(bs, symbol, codeValue); } }; diff --git a/src/librawspeed/decompressors/JpegDecompressor.cpp b/src/librawspeed/decompressors/JpegDecompressor.cpp index b2166a07f..a9623c129 100644 --- a/src/librawspeed/decompressors/JpegDecompressor.cpp +++ b/src/librawspeed/decompressors/JpegDecompressor.cpp @@ -40,7 +40,6 @@ #include "io/IOException.h" // for ThrowIOE #endif -using std::vector; using std::unique_ptr; using std::min; @@ -117,8 +116,6 @@ void JpegDecompressor::decode(uint32_t offX, uint32_t offY) { /* Each slice is a JPEG image */ struct JpegDecompressStruct dinfo; - vector buffer(1); - const auto size = input.getRemainSize(); JPEG_MEMSRC(&dinfo, input.getData(size), size); @@ -136,11 +133,14 @@ void JpegDecompressor::decode(uint32_t offX, complete_buffer( alignedMallocArray(dinfo.output_height, row_stride), &alignedFree); + + const Array2DRef tmp(&complete_buffer[0], + dinfo.output_components * dinfo.output_width, + dinfo.output_height, row_stride); + while (dinfo.output_scanline < dinfo.output_height) { - buffer[0] = static_cast( - &complete_buffer[static_cast(dinfo.output_scanline) * - row_stride]); - if (0 == jpeg_read_scanlines(&dinfo, &buffer[0], 1)) + auto rowOut = static_cast(&tmp(dinfo.output_scanline, 0)); + if (0 == jpeg_read_scanlines(&dinfo, &rowOut, 1)) ThrowRDE("JPEG Error while decompressing image."); } jpeg_finish_decompress(&dinfo); @@ -148,16 +148,11 @@ void JpegDecompressor::decode(uint32_t offX, // Now the image is decoded, and we copy the image data int copy_w = min(mRaw->dim.x - offX, dinfo.output_width); int copy_h = min(mRaw->dim.y - offY, dinfo.output_height); - for (int y = 0; y < copy_h; y++) { - uint8_t* src = &complete_buffer[static_cast(row_stride) * y]; - auto* dst = reinterpret_cast(mRaw->getData(offX, y + offY)); - for (int x = 0; x < copy_w; x++) { - for (int c = 0; c < dinfo.output_components; c++) { - *dst = *src; - src++; - dst++; - } - } + + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + for (int row = 0; row < copy_h; row++) { + for (int col = 0; col < dinfo.output_components * copy_w; col++) + out(row + offY, dinfo.output_components * offX + col) = tmp(row, col); } } diff --git a/src/librawspeed/decompressors/KodakDecompressor.cpp b/src/librawspeed/decompressors/KodakDecompressor.cpp index 702509763..a8983b65f 100644 --- a/src/librawspeed/decompressors/KodakDecompressor.cpp +++ b/src/librawspeed/decompressors/KodakDecompressor.cpp @@ -108,32 +108,29 @@ KodakDecompressor::decodeSegment(const uint32_t bsize) { } void KodakDecompressor::decompress() { - uint8_t* data = mRaw->getData(); - uint32_t pitch = mRaw->pitch; + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); uint32_t random = 0; - for (auto y = 0; y < mRaw->dim.y; y++) { - auto* dest = reinterpret_cast(&data[y * pitch]); - - for (auto x = 0; x < mRaw->dim.x; x += segment_size) { - const uint32_t len = std::min(segment_size, mRaw->dim.x - x); + for (int row = 0; row < out.height; row++) { + for (int col = 0; col < out.width;) { + const int len = std::min(segment_size, mRaw->dim.x - col); const segment buf = decodeSegment(len); std::array pred; pred.fill(0); - for (uint32_t i = 0; i < len; i++) { + for (int i = 0; i < len; ++i, ++col) { pred[i & 1] += buf[i]; int value = pred[i & 1]; - if (unsigned(value) >= (1U << bps)) + if (!isIntN(value, bps)) ThrowRDE("Value out of bounds %d (bps = %i)", value, bps); if (uncorrectedRawValues) - dest[x + i] = value; + out(row, col) = value; else - mRaw->setWithLookUp(value, reinterpret_cast(&dest[x + i]), + mRaw->setWithLookUp(value, reinterpret_cast(&out(row, col)), &random); } } diff --git a/src/librawspeed/decompressors/NikonDecompressor.cpp b/src/librawspeed/decompressors/NikonDecompressor.cpp index 25e9009d8..ae6a73eaa 100644 --- a/src/librawspeed/decompressors/NikonDecompressor.cpp +++ b/src/librawspeed/decompressors/NikonDecompressor.cpp @@ -466,10 +466,10 @@ NikonDecompressor::NikonDecompressor(const RawImage& raw, ByteStream metadata, if (bitsPS == 14) huffSelect += 3; - pUp1[0] = metadata.getU16(); - pUp1[1] = metadata.getU16(); - pUp2[0] = metadata.getU16(); - pUp2[1] = metadata.getU16(); + pUp[0][0] = metadata.getU16(); + pUp[1][0] = metadata.getU16(); + pUp[0][1] = metadata.getU16(); + pUp[1][1] = metadata.getU16(); curve = createCurve(&metadata, bitsPS, v0, v1, &split); @@ -482,43 +482,22 @@ template void NikonDecompressor::decompress(BitPumpMSB* bits, int start_y, int end_y) { Huffman ht = createHuffmanTable(huffSelect); - uint8_t* draw = mRaw->getData(); - uint32_t pitch = mRaw->pitch; - - int pLeft1 = 0; - int pLeft2 = 0; + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); // allow gcc to devirtualize the calls below auto* rawdata = reinterpret_cast(mRaw.get()); - const iPoint2D& size = mRaw->dim; - assert(size.x % 2 == 0); - assert(size.x >= 2); - for (uint32_t y = start_y; y < static_cast(end_y); y++) { - auto* dest = - reinterpret_cast(&draw[y * pitch]); // Adjust destination - pUp1[y & 1] += ht.decodeNext(*bits); - pUp2[y & 1] += ht.decodeNext(*bits); - pLeft1 = pUp1[y & 1]; - pLeft2 = pUp2[y & 1]; - - rawdata->setWithLookUp(clampBits(pLeft1, 15), - reinterpret_cast(dest + 0), &random); - rawdata->setWithLookUp(clampBits(pLeft2, 15), - reinterpret_cast(dest + 1), &random); - - dest += 2; - - for (uint32_t x = 2; x < static_cast(size.x); x += 2) { - pLeft1 += ht.decodeNext(*bits); - pLeft2 += ht.decodeNext(*bits); - - rawdata->setWithLookUp(clampBits(pLeft1, 15), - reinterpret_cast(dest + 0), &random); - rawdata->setWithLookUp(clampBits(pLeft2, 15), - reinterpret_cast(dest + 1), &random); - - dest += 2; + assert(out.width % 2 == 0); + assert(out.width >= 2); + for (int row = start_y; row < end_y; row++) { + std::array pred = pUp[row & 1]; + for (int col = 0; col < out.width; col++) { + pred[col & 1] += ht.decodeNext(*bits); + if (col < 2) + pUp[row & 1][col & 1] = pred[col & 1]; + rawdata->setWithLookUp(clampBits(pred[col & 1], 15), + reinterpret_cast(&out(row, col)), + &random); } } } diff --git a/src/librawspeed/decompressors/NikonDecompressor.h b/src/librawspeed/decompressors/NikonDecompressor.h index 534886dd6..3b3b26d35 100644 --- a/src/librawspeed/decompressors/NikonDecompressor.h +++ b/src/librawspeed/decompressors/NikonDecompressor.h @@ -37,8 +37,7 @@ class NikonDecompressor final : public AbstractDecompressor { uint32_t huffSelect = 0; uint32_t split = 0; - std::array pUp1; - std::array pUp2; + std::array, 2> pUp; std::vector curve; diff --git a/src/librawspeed/decompressors/OlympusDecompressor.cpp b/src/librawspeed/decompressors/OlympusDecompressor.cpp index 5c795fb34..b17924d6a 100644 --- a/src/librawspeed/decompressors/OlympusDecompressor.cpp +++ b/src/librawspeed/decompressors/OlympusDecompressor.cpp @@ -99,18 +99,18 @@ OlympusDecompressor::parseCarry(BitPumpMSB* bits, return (diff * 4) | low; } -inline int OlympusDecompressor::getPred(int row, int x, uint16_t* dest, - const uint16_t* up_ptr) { - auto getLeft = [dest]() { return dest[-2]; }; - auto getUp = [up_ptr]() { return up_ptr[0]; }; - auto getLeftUp = [up_ptr]() { return up_ptr[-2]; }; +inline int OlympusDecompressor::getPred(const Array2DRef out, int row, + int col) { + auto getLeft = [&]() { return out(row, col - 2); }; + auto getUp = [&]() { return out(row - 2, col); }; + auto getLeftUp = [&]() { return out(row - 2, col - 2); }; int pred; - if (row < 2 && x < 2) + if (row < 2 && col < 2) pred = 0; else if (row < 2) pred = getLeft(); - else if (x < 2) + else if (col < 2) pred = getUp(); else { int left = getLeft(); @@ -139,23 +139,19 @@ void OlympusDecompressor::decompressRow(BitPumpMSB* bits, int row) const { assert(mRaw->dim.x > 0); assert(mRaw->dim.x % 2 == 0); - int pitch = mRaw->pitch; + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); std::array, 2> acarry{{}}; - auto* dest = reinterpret_cast(mRaw->getData(0, row)); - const auto* up_ptr = row > 0 ? &dest[-pitch] : &dest[0]; - for (int x = 0; x < mRaw->dim.x; x++) { - int c = x & 1; + for (int col = 0; col < out.width; col++) { + int c = col & 1; std::array& carry = acarry[c]; int diff = parseCarry(bits, &carry); - int pred = getPred(row, x, dest, up_ptr); + int pred = getPred(out, row, col); - *dest = pred + diff; - dest++; - up_ptr++; + out(row, col) = pred + diff; } } diff --git a/src/librawspeed/decompressors/OlympusDecompressor.h b/src/librawspeed/decompressors/OlympusDecompressor.h index a9d420b21..54d7f4c49 100644 --- a/src/librawspeed/decompressors/OlympusDecompressor.h +++ b/src/librawspeed/decompressors/OlympusDecompressor.h @@ -46,8 +46,7 @@ class OlympusDecompressor final : public AbstractDecompressor { inline __attribute__((always_inline)) int parseCarry(BitPumpMSB* bits, std::array* carry) const; - static inline int getPred(int row, int x, uint16_t* dest, - const uint16_t* up_ptr); + static inline int getPred(Array2DRef out, int row, int col); void decompressRow(BitPumpMSB* bits, int row) const; diff --git a/src/librawspeed/decompressors/PanasonicDecompressor.cpp b/src/librawspeed/decompressors/PanasonicDecompressor.cpp index 3ae17bdcb..ca65b8543 100644 --- a/src/librawspeed/decompressors/PanasonicDecompressor.cpp +++ b/src/librawspeed/decompressors/PanasonicDecompressor.cpp @@ -163,9 +163,11 @@ class PanasonicDecompressor::ProxyStream { } }; -void PanasonicDecompressor::processPixelPacket( - ProxyStream* bits, int y, uint16_t* dest, int xbegin, +inline void PanasonicDecompressor::processPixelPacket( + ProxyStream* bits, int row, int col, std::vector* zero_pos) const noexcept { + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + int sh = 0; std::array pred; @@ -176,7 +178,7 @@ void PanasonicDecompressor::processPixelPacket( int u = 0; - for (int p = 0; p < PixelsPerPacket; p++) { + for (int p = 0; p < PixelsPerPacket; ++p, ++col) { const int c = p & 1; if (u == 2) { @@ -198,13 +200,12 @@ void PanasonicDecompressor::processPixelPacket( pred[c] = nonz[c] << 4 | bits->getBits(4); } - *dest = pred[c]; + out(row, col) = pred[c]; if (zero_is_bad && 0 == pred[c]) - zero_pos->push_back((y << 16) | (xbegin + p)); + zero_pos->push_back((row << 16) | col); u++; - dest++; } } @@ -213,28 +214,22 @@ void PanasonicDecompressor::processBlock(const Block& block, noexcept { ProxyStream bits(block.bs, section_split_offset); - for (int y = block.beginCoord.y; y <= block.endCoord.y; y++) { - int x = 0; + for (int row = block.beginCoord.y; row <= block.endCoord.y; row++) { + int col = 0; // First row may not begin at the first column. - if (block.beginCoord.y == y) - x = block.beginCoord.x; + if (block.beginCoord.y == row) + col = block.beginCoord.x; - int endx = mRaw->dim.x; + int endCol = mRaw->dim.x; // Last row may end before the last column. - if (block.endCoord.y == y) - endx = block.endCoord.x; - - auto* dest = reinterpret_cast(mRaw->getData(x, y)); + if (block.endCoord.y == row) + endCol = block.endCoord.x; - assert(x % PixelsPerPacket == 0); - assert(endx % PixelsPerPacket == 0); + assert(col % PixelsPerPacket == 0); + assert(endCol % PixelsPerPacket == 0); - for (; x < endx;) { - processPixelPacket(&bits, y, dest, x, zero_pos); - - x += PixelsPerPacket; - dest += PixelsPerPacket; - } + for (; col < endCol; col += PixelsPerPacket) + processPixelPacket(&bits, row, col, zero_pos); } } diff --git a/src/librawspeed/decompressors/PanasonicDecompressor.h b/src/librawspeed/decompressors/PanasonicDecompressor.h index 6591c01fd..e9e845ec0 100644 --- a/src/librawspeed/decompressors/PanasonicDecompressor.h +++ b/src/librawspeed/decompressors/PanasonicDecompressor.h @@ -73,8 +73,9 @@ class PanasonicDecompressor final : public AbstractDecompressor { void chopInputIntoBlocks(); - void processPixelPacket(ProxyStream* bits, int y, uint16_t* dest, int xbegin, - std::vector* zero_pos) const noexcept; + inline void + processPixelPacket(ProxyStream* bits, int row, int col, + std::vector* zero_pos) const noexcept; void processBlock(const Block& block, std::vector* zero_pos) const noexcept; diff --git a/src/librawspeed/decompressors/PanasonicDecompressorV5.cpp b/src/librawspeed/decompressors/PanasonicDecompressorV5.cpp index 3f32b8253..09a0874f0 100644 --- a/src/librawspeed/decompressors/PanasonicDecompressorV5.cpp +++ b/src/librawspeed/decompressors/PanasonicDecompressorV5.cpp @@ -174,21 +174,19 @@ class PanasonicDecompressorV5::ProxyStream { }; template -void PanasonicDecompressorV5::processPixelPacket(BitPumpLSB* bs, - uint16_t* dest) const { +inline void PanasonicDecompressorV5::processPixelPacket(BitPumpLSB* bs, int row, + int col) const { static_assert(dsc.pixelsPerPacket > 0, "dsc should be compile-time const"); static_assert(dsc.bps > 0 && dsc.bps <= 16, ""); + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + assert(bs->getFillLevel() == 0); - const uint16_t* const endDest = dest + dsc.pixelsPerPacket; - for (; dest != endDest;) { + for (int p = 0; p < dsc.pixelsPerPacket;) { bs->fill(); - for (; bs->getFillLevel() >= dsc.bps; dest++) { - assert(dest != endDest); - - *dest = bs->getBitsNoFill(dsc.bps); - } + for (; bs->getFillLevel() >= dsc.bps; ++p, ++col) + out(row, col) = bs->getBitsNoFill(dsc.bps); } bs->skipBitsNoFill(bs->getFillLevel()); // get rid of padding. } @@ -201,28 +199,22 @@ void PanasonicDecompressorV5::processBlock(const Block& block) const { ProxyStream proxy(block.bs); BitPumpLSB bs(proxy.getStream()); - for (int y = block.beginCoord.y; y <= block.endCoord.y; y++) { - int x = 0; + for (int row = block.beginCoord.y; row <= block.endCoord.y; row++) { + int col = 0; // First row may not begin at the first column. - if (block.beginCoord.y == y) - x = block.beginCoord.x; + if (block.beginCoord.y == row) + col = block.beginCoord.x; int endx = mRaw->dim.x; // Last row may end before the last column. - if (block.endCoord.y == y) + if (block.endCoord.y == row) endx = block.endCoord.x; - auto* dest = reinterpret_cast(mRaw->getData(x, y)); - - assert(x % dsc.pixelsPerPacket == 0); + assert(col % dsc.pixelsPerPacket == 0); assert(endx % dsc.pixelsPerPacket == 0); - for (; x < endx;) { - processPixelPacket(&bs, dest); - - x += dsc.pixelsPerPacket; - dest += dsc.pixelsPerPacket; - } + for (; col < endx; col += dsc.pixelsPerPacket) + processPixelPacket(&bs, row, col); } } diff --git a/src/librawspeed/decompressors/PanasonicDecompressorV5.h b/src/librawspeed/decompressors/PanasonicDecompressorV5.h index 9396c07cc..1ad2b2507 100644 --- a/src/librawspeed/decompressors/PanasonicDecompressorV5.h +++ b/src/librawspeed/decompressors/PanasonicDecompressorV5.h @@ -91,7 +91,7 @@ class PanasonicDecompressorV5 final : public AbstractDecompressor { void chopInputIntoBlocks(const PacketDsc& dsc); template - void processPixelPacket(BitPumpLSB* bs, uint16_t* dest) const; + inline void processPixelPacket(BitPumpLSB* bs, int row, int col) const; template void processBlock(const Block& block) const; diff --git a/src/librawspeed/decompressors/PentaxDecompressor.cpp b/src/librawspeed/decompressors/PentaxDecompressor.cpp index 9c8776607..e58a1585d 100644 --- a/src/librawspeed/decompressors/PentaxDecompressor.cpp +++ b/src/librawspeed/decompressors/PentaxDecompressor.cpp @@ -137,36 +137,24 @@ HuffmanTable PentaxDecompressor::SetupHuffmanTable(ByteStream* metaData) { } void PentaxDecompressor::decompress(const ByteStream& data) const { - BitPumpMSB bs(data); - uint8_t* draw = mRaw->getData(); - - assert(mRaw->dim.y > 0); - assert(mRaw->dim.x > 0); - assert(mRaw->dim.x % 2 == 0); - - std::array pUp1 = {{}}; - std::array pUp2 = {{}}; - - for (int y = 0; y < mRaw->dim.y && mRaw->dim.x >= 2; y++) { - auto* dest = reinterpret_cast(&draw[y * mRaw->pitch]); + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); - pUp1[y & 1] += ht.decodeNext(bs); - pUp2[y & 1] += ht.decodeNext(bs); + assert(out.height > 0); + assert(out.width > 0); + assert(out.width % 2 == 0); - int pLeft1 = dest[0] = pUp1[y & 1]; - int pLeft2 = dest[1] = pUp2[y & 1]; - - for (int x = 2; x < mRaw->dim.x; x += 2) { - pLeft1 += ht.decodeNext(bs); - pLeft2 += ht.decodeNext(bs); - - dest[x] = pLeft1; - dest[x + 1] = pLeft2; - - if (pLeft1 < 0 || pLeft1 > 65535) - ThrowRDE("decoded value out of bounds at %d:%d", x, y); - if (pLeft2 < 0 || pLeft2 > 65535) - ThrowRDE("decoded value out of bounds at %d:%d", x, y); + BitPumpMSB bs(data); + for (int row = 0; row < out.height; row++) { + std::array pred = {{}}; + if (row >= 2) + pred = {out(row - 2, 0), out(row - 2, 1)}; + + for (int col = 0; col < out.width; col++) { + pred[col & 1] += ht.decodeNext(bs); + int value = pred[col & 1]; + if (!isIntN(value, 16)) + ThrowRDE("decoded value out of bounds at %d:%d", col, row); + out(row, col) = value; } } } diff --git a/src/librawspeed/decompressors/PhaseOneDecompressor.cpp b/src/librawspeed/decompressors/PhaseOneDecompressor.cpp index bb4bc7e5a..fda407522 100644 --- a/src/librawspeed/decompressors/PhaseOneDecompressor.cpp +++ b/src/librawspeed/decompressors/PhaseOneDecompressor.cpp @@ -46,15 +46,15 @@ PhaseOneDecompressor::PhaseOneDecompressor(const RawImage& img, ThrowRDE("Unexpected cpp: %u", mRaw->getCpp()); if (!mRaw->dim.hasPositiveArea() || mRaw->dim.x % 2 != 0 || - mRaw->dim.x > 11976 || mRaw->dim.y > 8852) { + mRaw->dim.x > 11976 || mRaw->dim.y > 8854) { ThrowRDE("Unexpected image dimensions found: (%u; %u)", mRaw->dim.x, mRaw->dim.y); } - validateStrips(); + prepareStrips(); } -void PhaseOneDecompressor::validateStrips() const { +void PhaseOneDecompressor::prepareStrips() { // The 'strips' vector should contain exactly one element per row of image. // If the length is different, then the 'strips' vector is clearly incorrect. @@ -63,42 +63,26 @@ void PhaseOneDecompressor::validateStrips() const { strips.size()); } - struct RowBin { - using value_type = unsigned char; - bool isEmpty() const { return data == 0; } - void fill() { data = 1; } - value_type data = 0; - }; - // Now, the strips in 'strips' vector aren't in order. // The 'decltype(strips)::value_type::n' is the row number of a strip. // We need to make sure that we have every row (0..mRaw->dim.y-1), once. - - // There are many ways to do that. Here, we take the histogram of all the - // row numbers, and if any bin ends up not being '1' (one strip per row), - // then the input is bad. - std::vector histogram; - histogram.resize(strips.size()); - int numBinsFilled = 0; - std::for_each(strips.begin(), strips.end(), - [y = mRaw->dim.y, &histogram, - &numBinsFilled](const PhaseOneStrip& strip) { - if (strip.n < 0 || strip.n >= y) - ThrowRDE("Strip specifies out-of-bounds row %u", strip.n); - RowBin& rowBin = histogram[strip.n]; - if (!rowBin.isEmpty()) - ThrowRDE("Duplicate row %u", strip.n); - rowBin.fill(); - numBinsFilled++; - }); - assert(histogram.size() == strips.size()); - assert(numBinsFilled == mRaw->dim.y && - "We should only get here if all the rows/bins got filled."); + // For that, first let's sort them to have monothonically increasting `n`. + // This will also serialize the per-line outputting. + std::sort( + strips.begin(), strips.end(), + [](const PhaseOneStrip& a, const PhaseOneStrip& b) { return a.n < b.n; }); + // And now ensure that slice number matches the slice's row. + for (decltype(strips)::size_type i = 0; i < strips.size(); ++i) + if (static_cast(strips[i].n) != i) + ThrowRDE("Strips validation issue."); + // All good. } void PhaseOneDecompressor::decompressStrip(const PhaseOneStrip& strip) const { - uint32_t width = mRaw->dim.x; - assert(width % 2 == 0); + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + + assert(out.width > 0); + assert(out.width % 2 == 0); static constexpr std::array length = {8, 7, 6, 9, 11, 10, 5, 12, 14, 13}; @@ -108,10 +92,11 @@ void PhaseOneDecompressor::decompressStrip(const PhaseOneStrip& strip) const { std::array pred; pred.fill(0); std::array len; - auto* img = reinterpret_cast(mRaw->getData(0, strip.n)); - for (uint32_t col = 0; col < width; col++) { + const int row = strip.n; + for (int col = 0; col < out.width; col++) { pump.fill(32); - if (col >= (width & ~7U)) // last 'width % 8' pixels. + if (static_cast(col) >= + (out.width & ~7U)) // last 'width % 8' pixels. len[0] = len[1] = 14; else if ((col & 7) == 0) { for (int& i : len) { @@ -135,12 +120,12 @@ void PhaseOneDecompressor::decompressStrip(const PhaseOneStrip& strip) const { int i = len[col & 1]; if (i == 14) - img[col] = pred[col & 1] = pump.getBitsNoFill(16); + out(row, col) = pred[col & 1] = pump.getBitsNoFill(16); else { pred[col & 1] += static_cast(pump.getBitsNoFill(i)) + 1 - (1 << (i - 1)); // FIXME: is the truncation the right solution here? - img[col] = uint16_t(pred[col & 1]); + out(row, col) = uint16_t(pred[col & 1]); } } } diff --git a/src/librawspeed/decompressors/PhaseOneDecompressor.h b/src/librawspeed/decompressors/PhaseOneDecompressor.h index b1f1b7ef0..37d5d5e3a 100644 --- a/src/librawspeed/decompressors/PhaseOneDecompressor.h +++ b/src/librawspeed/decompressors/PhaseOneDecompressor.h @@ -33,9 +33,10 @@ namespace rawspeed { class RawImage; struct PhaseOneStrip { - const int n; - const ByteStream bs; + int n; + ByteStream bs; + PhaseOneStrip() = default; PhaseOneStrip(int block, ByteStream bs_) : n(block), bs(std::move(bs_)) {} }; @@ -48,7 +49,7 @@ class PhaseOneDecompressor final : public AbstractDecompressor { void decompressThread() const noexcept; - void validateStrips() const; + void prepareStrips(); public: PhaseOneDecompressor(const RawImage& img, diff --git a/src/librawspeed/decompressors/SamsungV0Decompressor.cpp b/src/librawspeed/decompressors/SamsungV0Decompressor.cpp index a4dc223f4..a9412df40 100644 --- a/src/librawspeed/decompressors/SamsungV0Decompressor.cpp +++ b/src/librawspeed/decompressors/SamsungV0Decompressor.cpp @@ -84,22 +84,14 @@ void SamsungV0Decompressor::computeStripes(ByteStream bso, ByteStream bsr) { } void SamsungV0Decompressor::decompress() const { - for (int y = 0; y < mRaw->dim.y; y++) - decompressStrip(y, stripes[y]); + for (int row = 0; row < mRaw->dim.y; row++) + decompressStrip(row, stripes[row]); // Swap red and blue pixels to get the final CFA pattern - for (int y = 0; y < mRaw->dim.y - 1; y += 2) { - auto* topline = reinterpret_cast(mRaw->getData(0, y)); - auto* bottomline = reinterpret_cast(mRaw->getData(0, y + 1)); - - for (int x = 0; x < mRaw->dim.x - 1; x += 2) { - uint16_t temp = topline[1]; - topline[1] = bottomline[0]; - bottomline[0] = temp; - - topline += 2; - bottomline += 2; - } + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + for (int row = 0; row < out.height - 1; row += 2) { + for (int col = 0; col < out.width - 1; col += 2) + std::swap(out(row, col + 1), out(row + 1, col)); } } @@ -110,27 +102,19 @@ int32_t SamsungV0Decompressor::calcAdj(BitPumpMSB32* bits, int b) { return adj; } -void SamsungV0Decompressor::decompressStrip(uint32_t y, +void SamsungV0Decompressor::decompressStrip(int row, const ByteStream& bs) const { - const uint32_t width = mRaw->dim.x; - assert(width > 0); + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + assert(out.width > 0); BitPumpMSB32 bits(bs); std::array len; for (int& i : len) - i = y < 2 ? 7 : 4; - - auto* img = reinterpret_cast(mRaw->getData(0, y)); - const auto* const past_last = - reinterpret_cast(mRaw->getData(width - 1, y) + mRaw->getBpp()); - uint16_t* img_up = reinterpret_cast( - mRaw->getData(0, std::max(0, static_cast(y) - 1))); - uint16_t* img_up2 = reinterpret_cast( - mRaw->getData(0, std::max(0, static_cast(y) - 2))); + i = row < 2 ? 7 : 4; // Image is arranged in groups of 16 pixels horizontally - for (uint32_t x = 0; x < width; x += 16) { + for (int col = 0; col < out.width; col += 16) { bits.fill(); bool dir = !!bits.getBitsNoFill(1); @@ -165,10 +149,10 @@ void SamsungV0Decompressor::decompressStrip(uint32_t y, if (dir) { // Upward prediction - if (y < 2) + if (row < 2) ThrowRDE("Upward prediction for the first two rows. Raw corrupt"); - if (x + 16 >= width) + if (col + 16 >= out.width) ThrowRDE("Upward prediction for the last block of pixels. Raw corrupt"); // First we decode even pixels @@ -176,7 +160,7 @@ void SamsungV0Decompressor::decompressStrip(uint32_t y, int b = len[c >> 3]; int32_t adj = calcAdj(&bits, b); - img[c] = adj + img_up[c]; + out(row, col + c) = adj + out(row - 1, col + c); } // Now we decode odd pixels @@ -186,34 +170,30 @@ void SamsungV0Decompressor::decompressStrip(uint32_t y, int b = len[2 | (c >> 3)]; int32_t adj = calcAdj(&bits, b); - img[c] = adj + img_up2[c]; + out(row, col + c) = adj + out(row - 2, col + c); } } else { // Left to right prediction // First we decode even pixels - int pred_left = x != 0 ? img[-2] : 128; + int pred_left = col != 0 ? out(row, col - 2) : 128; for (int c = 0; c < 16; c += 2) { int b = len[c >> 3]; int32_t adj = calcAdj(&bits, b); - if (img + c < past_last) - img[c] = adj + pred_left; + if (col + c < out.width) + out(row, col + c) = adj + pred_left; } // Now we decode odd pixels - pred_left = x != 0 ? img[-1] : 128; + pred_left = col != 0 ? out(row, col - 1) : 128; for (int c = 1; c < 16; c += 2) { int b = len[2 | (c >> 3)]; int32_t adj = calcAdj(&bits, b); - if (img + c < past_last) - img[c] = adj + pred_left; + if (col + c < out.width) + out(row, col + c) = adj + pred_left; } } - - img += 16; - img_up += 16; - img_up2 += 16; } } diff --git a/src/librawspeed/decompressors/SamsungV0Decompressor.h b/src/librawspeed/decompressors/SamsungV0Decompressor.h index b3d1a7181..4a29449a3 100644 --- a/src/librawspeed/decompressors/SamsungV0Decompressor.h +++ b/src/librawspeed/decompressors/SamsungV0Decompressor.h @@ -36,7 +36,7 @@ class SamsungV0Decompressor final : public AbstractSamsungDecompressor { void computeStripes(ByteStream bso, ByteStream bsr); - void decompressStrip(uint32_t y, const ByteStream& bs) const; + void decompressStrip(int row, const ByteStream& bs) const; static int32_t calcAdj(BitPumpMSB32* bits, int b); diff --git a/src/librawspeed/decompressors/SamsungV1Decompressor.cpp b/src/librawspeed/decompressors/SamsungV1Decompressor.cpp index bc0241b6c..123a6488f 100644 --- a/src/librawspeed/decompressors/SamsungV1Decompressor.cpp +++ b/src/librawspeed/decompressors/SamsungV1Decompressor.cpp @@ -39,7 +39,7 @@ struct SamsungV1Decompressor::encTableItem { SamsungV1Decompressor::SamsungV1Decompressor(const RawImage& image, const ByteStream* bs_, int bit) - : AbstractSamsungDecompressor(image), bs(bs_), bits(bit) { + : AbstractSamsungDecompressor(image), bs(bs_) { if (mRaw->getCpp() != 1 || mRaw->getDataType() != TYPE_USHORT16 || mRaw->getBpp() != 2) ThrowRDE("Unexpected component count / data type"); @@ -54,7 +54,8 @@ SamsungV1Decompressor::SamsungV1Decompressor(const RawImage& image, const uint32_t width = mRaw->dim.x; const uint32_t height = mRaw->dim.y; - if (width == 0 || height == 0 || width > 5664 || height > 3714) + if (width == 0 || height == 0 || width % 32 != 0 || height % 2 != 0 || + width > 5664 || height > 3714) ThrowRDE("Unexpected image dimensions found: (%u; %u)", width, height); } @@ -77,9 +78,6 @@ SamsungV1Decompressor::samsungDiff(BitPumpMSB* pump, } void SamsungV1Decompressor::decompress() { - const uint32_t width = mRaw->dim.x; - const uint32_t height = mRaw->dim.y; - // This format has a variable length encoding of how many bits are needed // to encode the difference between pixels, we use a table to process it // that has two values, the first the number of bits that were used to @@ -101,8 +99,6 @@ void SamsungV1Decompressor::decompress() { {4, 8}, {4, 2}}}; std::vector tbl(1024); - std::array, 2> vpred = {{}}; - std::array hpred; // We generate a 1024 entry table (to be addressed by reading 10 bits) by // consecutively filling in 2^(10-N) positions where N is the variable number @@ -120,18 +116,23 @@ void SamsungV1Decompressor::decompress() { } } + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + assert(out.width % 32 == 0 && "Should have even count of pixels per row."); + assert(out.height % 2 == 0 && "Should have even row count."); BitPumpMSB pump(*bs); - for (uint32_t y = 0; y < height; y++) { - auto* img = reinterpret_cast(mRaw->getData(0, y)); - for (uint32_t x = 0; x < width; x++) { + for (int row = 0; row < out.height; row++) { + std::array pred = {{}}; + if (row >= 2) + pred = {out(row - 2, 0), out(row - 2, 1)}; + + for (int col = 0; col < out.width; col++) { int32_t diff = samsungDiff(&pump, tbl); - if (x < 2) - hpred[x] = vpred[y & 1][x] += diff; - else - hpred[x & 1] += diff; - img[x] = hpred[x & 1]; - if (img[x] >> bits) - ThrowRDE("decoded value out of bounds at %d:%d", x, y); + pred[col & 1] += diff; + + int value = pred[col & 1]; + if (!isIntN(value, bits)) + ThrowRDE("decoded value out of bounds"); + out(row, col) = value; } } } diff --git a/src/librawspeed/decompressors/SamsungV1Decompressor.h b/src/librawspeed/decompressors/SamsungV1Decompressor.h index b79b46419..347e03a5d 100644 --- a/src/librawspeed/decompressors/SamsungV1Decompressor.h +++ b/src/librawspeed/decompressors/SamsungV1Decompressor.h @@ -38,7 +38,7 @@ class SamsungV1Decompressor final : public AbstractSamsungDecompressor { const std::vector& tbl); const ByteStream* bs; - int bits; + static constexpr int bits = 12; public: SamsungV1Decompressor(const RawImage& image, const ByteStream* bs_, int bit); diff --git a/src/librawspeed/decompressors/SamsungV2Decompressor.cpp b/src/librawspeed/decompressors/SamsungV2Decompressor.cpp index e1bd358aa..28447f887 100644 --- a/src/librawspeed/decompressors/SamsungV2Decompressor.cpp +++ b/src/librawspeed/decompressors/SamsungV2Decompressor.cpp @@ -71,30 +71,28 @@ constexpr bool operator&(SamsungV2Decompressor::OptFlags lhs, rhs)); } -inline int32_t SamsungV2Decompressor::getDiff(BitPumpMSB32* pump, - uint32_t len) { +inline __attribute__((always_inline)) int16_t +SamsungV2Decompressor::getDiff(BitPumpMSB32* pump, uint32_t len) { if (len == 0) return 0; - int32_t diff = pump->getBits(len); - // If the first bit is 1 we need to turn this into a negative number - if (diff >> (len - 1)) - diff -= (1 << len); - return diff; + assert(len <= 15 && "Difference occupies at most 15 bits."); + return signExtend(pump->getBits(len), len); } SamsungV2Decompressor::SamsungV2Decompressor(const RawImage& image, - const ByteStream& bs, int bit) - : AbstractSamsungDecompressor(image), bits(bit) { + const ByteStream& bs, + unsigned bits) + : AbstractSamsungDecompressor(image) { if (mRaw->getCpp() != 1 || mRaw->getDataType() != TYPE_USHORT16 || mRaw->getBpp() != 2) ThrowRDE("Unexpected component count / data type"); - switch (bit) { + switch (bits) { case 12: case 14: break; default: - ThrowRDE("Unexpected bit per pixel (%u)", bit); + ThrowRDE("Unexpected bit per pixel (%u)", bits); } static constexpr const auto headerSize = 16; @@ -107,6 +105,8 @@ SamsungV2Decompressor::SamsungV2Decompressor(const RawImage& image, startpump.getBits(16); // NLCVersion startpump.getBits(4); // ImgFormat bitDepth = startpump.getBits(4) + 1; + if (bitDepth != bits) + ThrowRDE("Bit depth mismatch with container, %u vs %u", bitDepth, bits); startpump.getBits(4); // NumBlkInRCUnit startpump.getBits(4); // CompressionRatio width = startpump.getBits(16); @@ -131,10 +131,9 @@ SamsungV2Decompressor::SamsungV2Decompressor(const RawImage& image, if (width == 0 || height == 0 || width % 16 != 0 || width > 6496 || height > 4336) - ThrowRDE("Unexpected image dimensions found: (%u; %u)", width, height); + ThrowRDE("Unexpected image dimensions found: (%i; %i)", width, height); - if (width != static_cast(mRaw->dim.x) || - height != static_cast(mRaw->dim.y)) + if (width != mRaw->dim.x || height != mRaw->dim.y) ThrowRDE("EXIF image dimensions do not match dimensions from raw header"); data = startpump.getStream(startpump.getRemainSize()); @@ -143,40 +142,40 @@ SamsungV2Decompressor::SamsungV2Decompressor(const RawImage& image, void SamsungV2Decompressor::decompress() { switch (_flags) { case OptFlags::NONE: - for (uint32_t row = 0; row < height; row++) + for (int row = 0; row < height; row++) decompressRow(row); break; case OptFlags::ALL: - for (uint32_t row = 0; row < height; row++) + for (int row = 0; row < height; row++) decompressRow(row); break; case OptFlags::SKIP: - for (uint32_t row = 0; row < height; row++) + for (int row = 0; row < height; row++) decompressRow(row); break; case OptFlags::MV: - for (uint32_t row = 0; row < height; row++) + for (int row = 0; row < height; row++) decompressRow(row); break; case OptFlags::QP: - for (uint32_t row = 0; row < height; row++) + for (int row = 0; row < height; row++) decompressRow(row); break; #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wswitch" case OptFlags::SKIP | OptFlags::MV: - for (uint32_t row = 0; row < height; row++) + for (int row = 0; row < height; row++) decompressRow(row); break; case OptFlags::SKIP | OptFlags::QP: - for (uint32_t row = 0; row < height; row++) + for (int row = 0; row < height; row++) decompressRow(row); break; case OptFlags::MV | OptFlags::QP: - for (uint32_t row = 0; row < height; row++) + for (int row = 0; row < height; row++) decompressRow(row); break; #pragma GCC diagnostic pop @@ -194,7 +193,189 @@ void SamsungV2Decompressor::decompress() { // the actual difference bits template -void SamsungV2Decompressor::decompressRow(uint32_t row) { +inline __attribute__((always_inline)) std::array +SamsungV2Decompressor::prepareBaselineValues(BitPumpMSB32* pump, int row, + int col) { + const Array2DRef img(mRaw->getU16DataAsUncroppedArray2DRef()); + + std::array baseline; + + if (!(optflags & OptFlags::QP) && !(col & 63)) { + static constexpr std::array scalevals = {{0, -2, 2}}; + uint32_t i = pump->getBits(2); + scale = i < 3 ? scale + scalevals[i] : pump->getBits(12); + } + + // First we figure out which reference pixels mode we're in + if (optflags & OptFlags::MV) + motion = pump->getBits(1) ? 3 : 7; + else if (!pump->getBits(1)) + motion = pump->getBits(3); + + if ((row == 0 || row == 1) && (motion != 7)) + ThrowRDE("At start of image and motion isn't 7. File corrupted?"); + + if (motion == 7) { + // The base case. + // If we're at the left edge we just start at the initial value. + if (col == 0) { + baseline.fill(initVal); + return baseline; + } + // Else just set all pixels to the previous ones on the same line. + std::array prev; + for (int i = 0; i < 2; i++) + prev[i] = img(row, col + i - 2); + for (int i = 0; i < 16; i++) + baseline[i] = prev[i & 1]; + return baseline; + } + + // The complex case, we now need to actually lookup one or two lines above + if (row < 2) + ThrowRDE("Got a previous line lookup on first two lines. File corrupted?"); + + static constexpr std::array motionOffset = {-4, -2, -2, 0, + 0, 2, 4}; + static constexpr std::array motionDoAverage = {0, 0, 1, 0, + 1, 0, 0}; + + int32_t slideOffset = motionOffset[motion]; + int32_t doAverage = motionDoAverage[motion]; + + for (int i = 0; i < 16; i++) { + int refRow = row; + int refCol = col + i + slideOffset; + + if ((row + i) & 1) { // Red or blue pixels use same color two lines up + refRow -= 2; + } else { // Green pixel N uses Green pixel N from row above + refRow -= 1; + refCol += (i & 1) ? -1 : 1; // (top left or top right) + } + + if (refCol < 0) + ThrowRDE("Bad motion %u at the beginning of the row", motion); + if ((refCol >= width) || (doAverage && (refCol + 2 >= width))) + ThrowRDE("Bad motion %u at the end of the row", motion); + + // In some cases we use as reference interpolation of this pixel and + // the next + if (doAverage) { + baseline[i] = (img(refRow, refCol) + img(refRow, refCol + 2) + 1) >> 1; + } else + baseline[i] = img(refRow, refCol); + } + + return baseline; +} + +template +inline __attribute__((always_inline)) std::array +SamsungV2Decompressor::decodeDiffLengths(BitPumpMSB32* pump, int row) { + if (!(optflags & OptFlags::SKIP || !pump->getBits(1))) + return {}; + + std::array diffBits; + + // Figure out how many difference bits we have to read for each pixel + std::array flags; + for (unsigned int& flag : flags) + flag = pump->getBits(2); + + for (int i = 0; i < 4; i++) { + // The color is 0-Green 1-Blue 2-Red + uint32_t colornum = (row % 2 != 0) ? i >> 1 : ((i >> 1) + 2) % 3; + + assert(flags[i] <= 3); + switch (flags[i]) { + case 0: + diffBits[i] = diffBitsMode[colornum][0]; + break; + case 1: + diffBits[i] = diffBitsMode[colornum][0] + 1; + break; + case 2: + if (diffBitsMode[colornum][0] == 0) + ThrowRDE("Difference bits underflow. File corrupted?"); + diffBits[i] = diffBitsMode[colornum][0] - 1; + break; + case 3: + diffBits[i] = pump->getBits(4); + break; + default: + __builtin_unreachable(); + } + + diffBitsMode[colornum][0] = diffBitsMode[colornum][1]; + diffBitsMode[colornum][1] = diffBits[i]; + + if (diffBits[i] > bitDepth + 1) + ThrowRDE("Too many difference bits (%u). File corrupted?", diffBits[i]); + assert(diffBits[i] <= 15 && "So any difference fits within uint16_t"); + } + + return diffBits; +} + +template +inline __attribute__((always_inline)) std::array +SamsungV2Decompressor::decodeDifferences(BitPumpMSB32* pump, int row) { + // Figure out how many difference bits we have to read for each pixel + const std::array diffBits = + decodeDiffLengths(pump, row); + + // Actually read the differences. We know these fit into 15-bit ints. + std::array diffs; + for (int i = 0; i < 16; i++) { + uint32_t len = diffBits[i >> 2]; + int16_t diff = getDiff(pump, len); + diffs[i] = diff; + } + + // Reshuffle the differences, while they still are only 16-bit. + std::array shuffled; + for (int i = 0; i < 16; i++) { + int p; + // The differences are stored interlaced: + // 0 2 4 6 8 10 12 14 1 3 5 7 9 11 13 15 + if (row % 2) + p = ((i % 8) << 1) - (i >> 3) + 1; + else + p = ((i % 8) << 1) + (i >> 3); + + shuffled[p] = diffs[i]; + } + + // And finally widen and scale the differences. + std::array scaled; + for (int i = 0; i < 16; i++) { + int scaledDiff = int(shuffled[i]) * (scale * 2 + 1) + scale; + scaled[i] = scaledDiff; + } + + return scaled; +} + +template +inline __attribute__((always_inline)) void +SamsungV2Decompressor::processBlock(BitPumpMSB32* pump, int row, int col) { + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + + const std::array baseline = + prepareBaselineValues(pump, row, col); + + // Figure out how many difference bits we have to read for each pixel + const std::array diffs = decodeDifferences(pump, row); + + // Actually apply the differences and write them to the pixels + for (int i = 0; i < 16; ++i, ++col) + out(row, col) = clampBits(baseline[i] + diffs[i], bitDepth); +} + +template +void SamsungV2Decompressor::decompressRow(int row) { + // Align pump to 16byte boundary const auto line_offset = data.getPosition(); if ((line_offset & 0xf) != 0) @@ -202,148 +383,18 @@ void SamsungV2Decompressor::decompressRow(uint32_t row) { BitPumpMSB32 pump(data); - auto* img = reinterpret_cast(mRaw->getData(0, row)); - uint16_t* img_up = reinterpret_cast( - mRaw->getData(0, std::max(0, static_cast(row) - 1))); - uint16_t* img_up2 = reinterpret_cast( - mRaw->getData(0, std::max(0, static_cast(row) - 2))); - // Initialize the motion and diff modes at the start of the line - uint32_t motion = 7; + motion = 7; // By default we are not scaling values at all - int32_t scale = 0; + scale = 0; - std::array, 3> diffBitsMode = {{}}; for (auto& i : diffBitsMode) i[0] = i[1] = (row == 0 || row == 1) ? 7 : 4; assert(width >= 16); assert(width % 16 == 0); - for (uint32_t col = 0; col < width; col += 16) { - if (!(optflags & OptFlags::QP) && !(col & 63)) { - static constexpr std::array scalevals = {{0, -2, 2}}; - uint32_t i = pump.getBits(2); - scale = i < 3 ? scale + scalevals[i] : pump.getBits(12); - } - - // First we figure out which reference pixels mode we're in - if (optflags & OptFlags::MV) - motion = pump.getBits(1) ? 3 : 7; - else if (!pump.getBits(1)) - motion = pump.getBits(3); - - if ((row == 0 || row == 1) && (motion != 7)) - ThrowRDE("At start of image and motion isn't 7. File corrupted?"); - - if (motion == 7) { - // The base case, just set all pixels to the previous ones on the same - // line If we're at the left edge we just start at the initial value - for (uint32_t i = 0; i < 16; i++) - img[i] = (col == 0) ? initVal : *(img + i - 2); - } else { - // The complex case, we now need to actually lookup one or two lines - // above - if (row < 2) - ThrowRDE( - "Got a previous line lookup on first two lines. File corrupted?"); - - static constexpr std::array motionOffset = {-4, -2, -2, 0, - 0, 2, 4}; - static constexpr std::array motionDoAverage = {0, 0, 1, 0, - 1, 0, 0}; - - int32_t slideOffset = motionOffset[motion]; - int32_t doAverage = motionDoAverage[motion]; - - for (uint32_t i = 0; i < 16; i++) { - uint16_t* line; - uint16_t* refpixel; - - if ((row + i) & 0x1) { - // Red or blue pixels use same color two lines up - line = img_up2; - refpixel = line + i + slideOffset; - } else { - // Green pixel N uses Green pixel N from row above - // (top left or top right) - line = img_up; - refpixel = line + i + slideOffset + (((i % 2) != 0) ? -1 : 1); - } - - if (col == 0 && line > refpixel) - ThrowRDE("Bad motion %u at the beginning of the row", motion); - if (col + 16 == width && ((refpixel >= line + 16) || - (doAverage && (refpixel + 2 >= line + 16)))) - ThrowRDE("Bad motion %u at the end of the row", motion); - - // In some cases we use as reference interpolation of this pixel and - // the next - if (doAverage) - img[i] = (*refpixel + *(refpixel + 2) + 1) >> 1; - else - img[i] = *refpixel; - } - } - - // Figure out how many difference bits we have to read for each pixel - std::array diffBits = {}; - if (optflags & OptFlags::SKIP || !pump.getBits(1)) { - std::array flags; - for (unsigned int& flag : flags) - flag = pump.getBits(2); - - for (uint32_t i = 0; i < 4; i++) { - // The color is 0-Green 1-Blue 2-Red - uint32_t colornum = (row % 2 != 0) ? i >> 1 : ((i >> 1) + 2) % 3; - - assert(flags[i] <= 3); - switch (flags[i]) { - case 0: - diffBits[i] = diffBitsMode[colornum][0]; - break; - case 1: - diffBits[i] = diffBitsMode[colornum][0] + 1; - break; - case 2: - if (diffBitsMode[colornum][0] == 0) - ThrowRDE("Difference bits underflow. File corrupted?"); - diffBits[i] = diffBitsMode[colornum][0] - 1; - break; - case 3: - diffBits[i] = pump.getBits(4); - break; - default: - __builtin_unreachable(); - } - - diffBitsMode[colornum][0] = diffBitsMode[colornum][1]; - diffBitsMode[colornum][1] = diffBits[i]; - - if (diffBits[i] > bitDepth + 1) - ThrowRDE("Too many difference bits. File corrupted?"); - } - } - - // Actually read the differences and write them to the pixels - for (uint32_t i = 0; i < 16; i++) { - uint32_t len = diffBits[i >> 2]; - int32_t diff = getDiff(&pump, len); - - uint16_t* value = nullptr; - // Apply the diff to pixels 0 2 4 6 8 10 12 14 1 3 5 7 9 11 13 15 - if (row % 2) - value = &img[((i & 0x7) << 1) + 1 - (i >> 3)]; - else - value = &img[((i & 0x7) << 1) + (i >> 3)]; - - diff = diff * (scale * 2 + 1) + scale; - *value = clampBits(static_cast(*value) + diff, bits); - } - - img += 16; - img_up += 16; - img_up2 += 16; - } + for (int col = 0; col < width; col += 16) + processBlock(&pump, row, col); data.skipBytes(pump.getBufferPosition()); } diff --git a/src/librawspeed/decompressors/SamsungV2Decompressor.h b/src/librawspeed/decompressors/SamsungV2Decompressor.h index af3afb338..ff6fb9387 100644 --- a/src/librawspeed/decompressors/SamsungV2Decompressor.h +++ b/src/librawspeed/decompressors/SamsungV2Decompressor.h @@ -35,22 +35,42 @@ class SamsungV2Decompressor final : public AbstractSamsungDecompressor { enum struct OptFlags : uint32_t; protected: - int bits; - uint32_t bitDepth; - uint32_t width; - uint32_t height; + int width; + int height; OptFlags _flags; - uint32_t initVal; + uint16_t initVal; ByteStream data; - static inline int32_t getDiff(BitPumpMSB32* pump, uint32_t len); + int motion; + int scale; + std::array, 3> diffBitsMode; + + static inline __attribute__((always_inline)) int16_t + getDiff(BitPumpMSB32* pump, uint32_t len); + + template + inline __attribute__((always_inline)) std::array + prepareBaselineValues(BitPumpMSB32* pump, int row, int col); + + template + inline __attribute__((always_inline)) std::array + decodeDiffLengths(BitPumpMSB32* pump, int row); + + template + inline __attribute__((always_inline)) std::array + decodeDifferences(BitPumpMSB32* pump, int row); + + template + inline __attribute__((always_inline)) void processBlock(BitPumpMSB32* pump, + int row, int col); - template void decompressRow(uint32_t row); + template void decompressRow(int row); public: - SamsungV2Decompressor(const RawImage& image, const ByteStream& bs, int bit); + SamsungV2Decompressor(const RawImage& image, const ByteStream& bs, + unsigned bit); void decompress(); }; diff --git a/src/librawspeed/decompressors/SonyArw1Decompressor.cpp b/src/librawspeed/decompressors/SonyArw1Decompressor.cpp index ce680b2dc..124c6b8e0 100644 --- a/src/librawspeed/decompressors/SonyArw1Decompressor.cpp +++ b/src/librawspeed/decompressors/SonyArw1Decompressor.cpp @@ -51,24 +51,19 @@ inline int SonyArw1Decompressor::getDiff(BitPumpMSB* bs, uint32_t len) { } void SonyArw1Decompressor::decompress(const ByteStream& input) const { - const uint32_t w = mRaw->dim.x; - const uint32_t h = mRaw->dim.y; - - assert(w > 0); - assert(h > 0); - assert(h % 2 == 0); + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + assert(out.width > 0); + assert(out.height > 0); + assert(out.height % 2 == 0); BitPumpMSB bits(input); - uint8_t* data = mRaw->getData(); - auto* dest = reinterpret_cast(&data[0]); - uint32_t pitch = mRaw->pitch / sizeof(uint16_t); - int sum = 0; - for (int64_t x = w - 1; x >= 0; x--) { - for (uint32_t y = 0; y < h + 1; y += 2) { + int pred = 0; + for (int col = out.width - 1; col >= 0; col--) { + for (int row = 0; row < out.height + 1; row += 2) { bits.fill(32); - if (y == h) - y = 1; + if (row == out.height) + row = 1; uint32_t len = 4 - bits.getBitsNoFill(2); @@ -80,13 +75,12 @@ void SonyArw1Decompressor::decompress(const ByteStream& input) const { len++; int diff = getDiff(&bits, len); - sum += diff; + pred += diff; - if (sum < 0 || (sum >> 12) > 0) + if (!isIntN(pred, 12)) ThrowRDE("Error decompressing"); - if (y < h) - dest[x + y * pitch] = sum; + out(row, col) = pred; } } } diff --git a/src/librawspeed/decompressors/SonyArw2Decompressor.cpp b/src/librawspeed/decompressors/SonyArw2Decompressor.cpp index 8be59f8db..eeca8fd9b 100644 --- a/src/librawspeed/decompressors/SonyArw2Decompressor.cpp +++ b/src/librawspeed/decompressors/SonyArw2Decompressor.cpp @@ -49,25 +49,23 @@ SonyArw2Decompressor::SonyArw2Decompressor(const RawImage& img, } void SonyArw2Decompressor::decompressRow(int row) const { - uint8_t* data = mRaw->getData(); - uint32_t pitch = mRaw->pitch; - int32_t w = mRaw->dim.x; + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); + assert(out.width > 0); + assert(out.width % 32 == 0); - assert(mRaw->dim.x > 0); - assert(mRaw->dim.x % 32 == 0); - - auto* dest = reinterpret_cast(&data[row * pitch]); + // Allow compiler to devirtualize the calls below. + auto& rawdata = reinterpret_cast(*mRaw); ByteStream rowBs = input; - rowBs.skipBytes(row * mRaw->dim.x); - rowBs = rowBs.peekStream(mRaw->dim.x); + rowBs.skipBytes(row * out.width); + rowBs = rowBs.peekStream(out.width); BitPumpLSB bits(rowBs); uint32_t random = bits.peekBits(24); // Each loop iteration processes 16 pixels, consuming 128 bits of input. - for (int32_t x = 0; x < w;) { + for (int col = 0; col < out.width; col += ((col & 1) != 0) ? 31 : 1) { // 30 bits. int _max = bits.getBits(11); int _min = bits.getBits(11); @@ -99,10 +97,9 @@ void SonyArw2Decompressor::decompressRow(int row) const { p = 0x7ff; } } - mRaw->setWithLookUp(p << 1, reinterpret_cast(&dest[x + i * 2]), - &random); + rawdata.setWithLookUp( + p << 1, reinterpret_cast(&out(row, col + i * 2)), &random); } - x += ((x & 1) != 0) ? 31 : 1; // Skip to next 32 pixels } } diff --git a/src/librawspeed/decompressors/VC5Decompressor.cpp b/src/librawspeed/decompressors/VC5Decompressor.cpp index f5a8a6eef..af00d4801 100644 --- a/src/librawspeed/decompressors/VC5Decompressor.cpp +++ b/src/librawspeed/decompressors/VC5Decompressor.cpp @@ -119,10 +119,10 @@ VC5Decompressor::Wavelet::bandAsArray2DRef(const unsigned int iBand) const { } namespace { -auto convolute = [](int x, int y, std::array muls, +auto convolute = [](int row, int col, std::array muls, const Array2DRef high, auto lowGetter, int DescaleShift = 0) { - auto highCombined = muls[0] * high(x, y); + auto highCombined = muls[0] * high(row, col); auto lowsCombined = [muls, lowGetter]() { int lows = 0; for (int i = 0; i < 3; i++) @@ -178,38 +178,38 @@ constexpr std::array ConvolutionParams::Last::mul_odd; void VC5Decompressor::Wavelet::reconstructPass( const Array2DRef dst, const Array2DRef high, const Array2DRef low) const noexcept { - auto process = [low, high, dst](auto segment, int x, int y) { - auto lowGetter = [&x, &y, low](int delta) { - return low(x, y + decltype(segment)::coord_shift + delta); + auto process = [low, high, dst](auto segment, int row, int col) { + auto lowGetter = [&row, &col, low](int delta) { + return low(row + decltype(segment)::coord_shift + delta, col); }; - auto convolution = [&x, &y, high, lowGetter](std::array muls) { - return convolute(x, y, muls, high, lowGetter, /*DescaleShift*/ 0); + auto convolution = [&row, &col, high, lowGetter](std::array muls) { + return convolute(row, col, muls, high, lowGetter, /*DescaleShift*/ 0); }; int even = convolution(decltype(segment)::mul_even); int odd = convolution(decltype(segment)::mul_odd); - dst(x, 2 * y) = static_cast(even); - dst(x, 2 * y + 1) = static_cast(odd); + dst(2 * row, col) = static_cast(even); + dst(2 * row + 1, col) = static_cast(odd); }; // Vertical reconstruction #ifdef HAVE_OPENMP #pragma omp for schedule(static) #endif - for (int y = 0; y < height; ++y) { - if (y == 0) { + for (int row = 0; row < height; ++row) { + if (row == 0) { // 1st row - for (int x = 0; x < width; ++x) - process(ConvolutionParams::First, x, y); - } else if (y + 1 < height) { + for (int col = 0; col < width; ++col) + process(ConvolutionParams::First, row, col); + } else if (row + 1 < height) { // middle rows - for (int x = 0; x < width; ++x) - process(ConvolutionParams::Middle, x, y); + for (int col = 0; col < width; ++col) + process(ConvolutionParams::Middle, row, col); } else { // last row - for (int x = 0; x < width; ++x) - process(ConvolutionParams::Last, x, y); + for (int col = 0; col < width; ++col) + process(ConvolutionParams::Last, row, col); } } } @@ -218,14 +218,14 @@ void VC5Decompressor::Wavelet::combineLowHighPass( const Array2DRef dst, const Array2DRef low, const Array2DRef high, int descaleShift, bool clampUint = false) const noexcept { - auto process = [low, high, descaleShift, clampUint, dst](auto segment, int x, - int y) { - auto lowGetter = [&x, &y, low](int delta) { - return low(x + decltype(segment)::coord_shift + delta, y); + auto process = [low, high, descaleShift, clampUint, dst](auto segment, + int row, int col) { + auto lowGetter = [&row, &col, low](int delta) { + return low(row, col + decltype(segment)::coord_shift + delta); }; - auto convolution = [&x, &y, high, lowGetter, + auto convolution = [&row, &col, high, lowGetter, descaleShift](std::array muls) { - return convolute(x, y, muls, high, lowGetter, descaleShift); + return convolute(row, col, muls, high, lowGetter, descaleShift); }; int even = convolution(decltype(segment)::mul_even); @@ -235,24 +235,24 @@ void VC5Decompressor::Wavelet::combineLowHighPass( even = clampBits(even, 14); odd = clampBits(odd, 14); } - dst(2 * x, y) = static_cast(even); - dst(2 * x + 1, y) = static_cast(odd); + dst(row, 2 * col) = static_cast(even); + dst(row, 2 * col + 1) = static_cast(odd); }; // Horizontal reconstruction #ifdef HAVE_OPENMP #pragma omp for schedule(static) #endif - for (int y = 0; y < dst.height; ++y) { + for (int row = 0; row < dst.height; ++row) { // First col - int x = 0; - process(ConvolutionParams::First, x, y); + int col = 0; + process(ConvolutionParams::First, row, col); // middle cols - for (x = 1; x + 1 < width; ++x) { - process(ConvolutionParams::Middle, x, y); + for (col = 1; col + 1 < width; ++col) { + process(ConvolutionParams::Middle, row, col); } // last col - process(ConvolutionParams::Last, x, y); + process(ConvolutionParams::Last, row, col); } } @@ -532,7 +532,7 @@ void VC5Decompressor::Wavelet::LowPassBand::decode(const Wavelet& wavelet) { BitPumpMSB bits(bs); for (auto row = 0; row < dst.height; ++row) { for (auto col = 0; col < dst.width; ++col) - dst(col, row) = static_cast(bits.getBits(lowpassPrecision)); + dst(row, col) = static_cast(bits.getBits(lowpassPrecision)); } } @@ -766,9 +766,7 @@ void VC5Decompressor::reconstructLowpassBands() const noexcept { } void VC5Decompressor::combineFinalLowpassBands() const noexcept { - const Array2DRef out(reinterpret_cast(mRaw->getData()), - mRaw->dim.x, mRaw->dim.y, - mRaw->pitch / sizeof(uint16_t)); + const Array2DRef out(mRaw->getU16DataAsUncroppedArray2DRef()); const int width = out.width / 2; const int height = out.height / 2; @@ -790,20 +788,20 @@ void VC5Decompressor::combineFinalLowpassBands() const noexcept { for (int col = 0; col < width; ++col) { const int mid = 2048; - int gs = lowbands0(col, row); - int rg = lowbands1(col, row) - mid; - int bg = lowbands2(col, row) - mid; - int gd = lowbands3(col, row) - mid; + int gs = lowbands0(row, col); + int rg = lowbands1(row, col) - mid; + int bg = lowbands2(row, col) - mid; + int gd = lowbands3(row, col) - mid; int r = gs + 2 * rg; int b = gs + 2 * bg; int g1 = gs + gd; int g2 = gs - gd; - out(2 * col + 0, 2 * row + 0) = static_cast(mVC5LogTable[r]); - out(2 * col + 1, 2 * row + 0) = static_cast(mVC5LogTable[g1]); - out(2 * col + 0, 2 * row + 1) = static_cast(mVC5LogTable[g2]); - out(2 * col + 1, 2 * row + 1) = static_cast(mVC5LogTable[b]); + out(2 * row + 0, 2 * col + 0) = static_cast(mVC5LogTable[r]); + out(2 * row + 0, 2 * col + 1) = static_cast(mVC5LogTable[g1]); + out(2 * row + 1, 2 * col + 0) = static_cast(mVC5LogTable[g2]); + out(2 * row + 1, 2 * col + 1) = static_cast(mVC5LogTable[b]); } } } diff --git a/src/librawspeed/interpolators/Cr2sRawInterpolator.cpp b/src/librawspeed/interpolators/Cr2sRawInterpolator.cpp index 5f5a75a2c..6285c4c37 100644 --- a/src/librawspeed/interpolators/Cr2sRawInterpolator.cpp +++ b/src/librawspeed/interpolators/Cr2sRawInterpolator.cpp @@ -28,7 +28,6 @@ #include // for assert #include // for is_pod -using std::is_pod; using std::array; namespace rawspeed { @@ -38,12 +37,6 @@ struct Cr2sRawInterpolator::YCbCr final { int Cb; int Cr; - inline static void LoadY(YCbCr* dst, const YCbCr& src) { - assert(dst); - - dst->Y = src.Y; - } - inline static void LoadY(YCbCr* p, const uint16_t* data) { assert(p); assert(data); @@ -59,7 +52,7 @@ struct Cr2sRawInterpolator::YCbCr final { p->Cr = data[2]; } - inline static void Load(YCbCr* p, const uint16_t* data) { + inline static void LoadYCbCr(YCbCr* p, const uint16_t* data) { assert(p); assert(data); @@ -69,14 +62,6 @@ struct Cr2sRawInterpolator::YCbCr final { YCbCr() = default; - explicit YCbCr(uint16_t* data) { - static_assert(is_pod::value, "not a POD"); - - assert(data); - - Load(this, data); - } - inline void signExtend() { Cb -= 16384; Cr -= 16384; @@ -130,7 +115,8 @@ inline void Cr2sRawInterpolator::interpolate_422_row(uint16_t* data, int w) { assert(x % 2 == 0); // load, process and output first pixel, which is full - YCbCr p0(data); + YCbCr p0; + YCbCr::LoadYCbCr(&p0, data); p0.process(hue); YUV_TO_RGB(p0, data); data += 3; @@ -140,7 +126,8 @@ inline void Cr2sRawInterpolator::interpolate_422_row(uint16_t* data, int w) { YCbCr::LoadY(&p, data); // load third pixel, which is full, process - YCbCr p1(data + 3); + YCbCr p1; + YCbCr::LoadYCbCr(&p1, data + 3); p1.process(hue); // and finally, interpolate and output the middle pixel @@ -157,7 +144,8 @@ inline void Cr2sRawInterpolator::interpolate_422_row(uint16_t* data, int w) { // .. [ Y1 Cb Cr ] [ Y2 ... ... ] // load, process and output first pixel, which is full - YCbCr p(data); + YCbCr p; + YCbCr::LoadYCbCr(&p, data); p.process(hue); YUV_TO_RGB(p, data); data += 3; @@ -225,7 +213,8 @@ Cr2sRawInterpolator::interpolate_420_row(std::array line, int w) { assert(x % 2 == 0); // load, process and output first pixel of first row, which is full - YCbCr p0(line[0]); + YCbCr p0; + YCbCr::LoadYCbCr(&p0, line[0]); p0.process(hue); YUV_TO_RGB(p0, line[0]); line[0] += 3; @@ -288,7 +277,8 @@ Cr2sRawInterpolator::interpolate_420_row(std::array line, int w) { // .. . . .. . . // load, process and output first pixel of first row, which is full - YCbCr p0(line[0]); + YCbCr p0; + YCbCr::LoadYCbCr(&p0, line[0]); p0.process(hue); YUV_TO_RGB(p0, line[0]); line[0] += 3; @@ -366,7 +356,8 @@ inline void Cr2sRawInterpolator::interpolate_420(int w, int h) { assert(x % 2 == 0); // load, process and output first pixel of first row, which is full - YCbCr p0(line[0]); + YCbCr p0; + YCbCr::LoadYCbCr(&p0, line[0]); p0.process(hue); YUV_TO_RGB(p0, line[0]); line[0] += 3; @@ -415,7 +406,8 @@ inline void Cr2sRawInterpolator::interpolate_420(int w, int h) { // row 1: ... [ Y3 ... ... ] [ Y4 ... ... ] // load, process and output first pixel of first row, which is full - YCbCr p(line[0]); + YCbCr p; + YCbCr::LoadYCbCr(&p, line[0]); p.process(hue); YUV_TO_RGB(p, line[0]); line[0] += 3; diff --git a/src/librawspeed/io/BitStream.cpp b/src/librawspeed/io/BitStream.cpp new file mode 100644 index 000000000..c20e4324a --- /dev/null +++ b/src/librawspeed/io/BitStream.cpp @@ -0,0 +1,27 @@ +/* + RawSpeed - RAW file decoder. + + Copyright (C) 2019 Roman Lebedev + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include "io/BitStream.h" // for BitStreamCacheBase + +namespace rawspeed { + +constexpr unsigned BitStreamCacheBase::MaxProcessBytes; + +} // namespace rawspeed diff --git a/src/librawspeed/io/BitStream.h b/src/librawspeed/io/BitStream.h index 0922da39a..f576ee125 100644 --- a/src/librawspeed/io/BitStream.h +++ b/src/librawspeed/io/BitStream.h @@ -23,7 +23,7 @@ #pragma once #include "common/Common.h" // for uint32_t, uint8_t, uint64_t -#include "io/Buffer.h" // for Buffer::size_type, BUFFER_PADDING +#include "io/Buffer.h" // for Buffer::size_type #include "io/ByteStream.h" // for ByteStream #include "io/IOException.h" // for IOException (ptr only), ThrowIOE #include // for assert @@ -93,6 +93,12 @@ template class BitStream final : public ByteStream { Cache cache; + // A temporary intermediate buffer that may be used by fill() method either + // in debug build to enforce lack of out-of-bounds reads, or when we are + // nearing the end of the input buffer and can not just read MaxProcessBytes + // from it, but have to read as much as we can and fill rest with zeros. + std::array tmp = {}; + // this method hase to be implemented in the concrete BitStream template // specializations. It will return the number of bytes processed. It needs // to process up to BitStreamCacheBase::MaxProcessBytes bytes of input. @@ -108,57 +114,46 @@ class BitStream final : public ByteStream { } private: - inline void fillSafe() { + inline const uint8_t* getInput() { assert(data); - if (pos + BitStreamCacheBase::MaxProcessBytes <= size) { - std::array tmp; - tmp.fill(0); - assert(!(size - pos < BitStreamCacheBase::MaxProcessBytes)); - memcpy(tmp.data(), data + pos, BitStreamCacheBase::MaxProcessBytes); - pos += fillCache(tmp.data(), size, &pos); - } else if (pos < size) { - std::array tmp; - tmp.fill(0); - assert(size - pos < BitStreamCacheBase::MaxProcessBytes); - memcpy(tmp.data(), data + pos, size - pos); - pos += fillCache(tmp.data(), size, &pos); - } else if (pos <= size + BitStreamCacheBase::MaxProcessBytes) { - std::array tmp; - tmp.fill(0); - pos += fillCache(tmp.data(), size, &pos); - } else { - // assert(size < pos); + +#if !defined(DEBUG) + // Do we have MaxProcessBytes or more bytes left in the input buffer? + // If so, then we can just read from said buffer. + if (pos + BitStreamCacheBase::MaxProcessBytes <= size) + return data + pos; +#endif + + // We have to use intermediate buffer, either because the input is running + // out of bytes, or because we want to enforce bounds checking. + + // Note that in order to keep all fill-level invariants we must allow to + // over-read past-the-end a bit. + if (pos > size + BitStreamCacheBase::MaxProcessBytes) ThrowIOE("Buffer overflow read in BitStream"); - } - } - // In non-DEBUG builds, fillSafe() will be called at most once - // per the life-time of the BitStream therefore it should *NOT* be inlined - // into the normal codepath. - inline void __attribute__((noinline, cold)) fillSafeNoinline() { fillSafe(); } + tmp.fill(0); + + // How many bytes are left in input buffer? + // Since pos can be past-the-end we need to carefully handle overflow. + Buffer::size_type bytesRemaining = (pos < size) ? size - pos : 0; + // And if we are not at the end of the input, we may have more than we need. + bytesRemaining = + std::min(BitStreamCacheBase::MaxProcessBytes, bytesRemaining); + + memcpy(tmp.data(), data + pos, bytesRemaining); + return tmp.data(); + } public: inline void fill(uint32_t nbits = Cache::MaxGetBits) { assert(data); assert(nbits <= Cache::MaxGetBits); - if (cache.fillLevel < nbits) { -#if defined(DEBUG) - // really slow, but best way to check all the assumptions. - fillSafe(); -#elif BUFFER_PADDING >= 8 - static_assert(BitStreamCacheBase::MaxProcessBytes == 8, - "update these too"); - // FIXME: this looks very wrong. We don't check pos at all here. - // I suspect this should be: if (pos <= size) - pos += fillCache(data + pos, size, &pos); -#else - // disabling this run-time bounds check saves about 1% on intel x86-64 - if (pos + BitStreamCacheBase::MaxProcessBytes <= size) - pos += fillCache(data + pos, size, &pos); - else - fillSafeNoinline(); -#endif - } + + if (cache.fillLevel >= nbits) + return; + + pos += fillCache(getInput(), size, &pos); } // these methods might be specialized by implementations that support it diff --git a/src/librawspeed/io/Buffer.cpp b/src/librawspeed/io/Buffer.cpp deleted file mode 100644 index 4133fa33d..000000000 --- a/src/librawspeed/io/Buffer.cpp +++ /dev/null @@ -1,129 +0,0 @@ -/* - RawSpeed - RAW file decoder. - - Copyright (C) 2009-2014 Klaus Post - Copyright (C) 2017 Axel Waggershauser - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2 of the License, or (at your option) any later version. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include "io/Buffer.h" -#include "AddressSanitizer.h" // for ASan -#include "common/Common.h" // for uint8_t, roundUp -#include "common/Memory.h" // for alignedFree, alignedFreeConstPtr, alig... -#include "io/IOException.h" // for ThrowIOE -#include // for assert -#include // for unique_ptr - -using std::unique_ptr; - -namespace rawspeed { - -unique_ptr Buffer::Create(size_type size) { - if (!size) - ThrowIOE("Trying to allocate 0 bytes sized buffer."); - - unique_ptr data( - alignedMalloc(roundUp(size + BUFFER_PADDING, 16)), - &alignedFree); - if (!data) - ThrowIOE("Failed to allocate %uz bytes memory buffer.", size); - - assert(!ASan::RegionIsPoisoned(data.get(), size)); - - return data; -} - -Buffer::Buffer(unique_ptr data_, - size_type size_) - : size(size_) { - if (!size) - ThrowIOE("Buffer has zero size?"); - - if (data_.get_deleter() != &alignedFree) - ThrowIOE("Wrong deleter. Expected rawspeed::alignedFree()"); - - data = data_.release(); - if (!data) - ThrowIOE("Memory buffer is nonexistent"); - - assert(!ASan::RegionIsPoisoned(data, size)); - - isOwner = true; -} - -Buffer::~Buffer() { - if (isOwner) { - alignedFreeConstPtr(data); - } -} - -Buffer& Buffer::operator=(Buffer&& rhs) noexcept { - if (this == &rhs) { - assert(!ASan::RegionIsPoisoned(data, size)); - return *this; - } - - if (isOwner) - alignedFreeConstPtr(data); - - data = rhs.data; - size = rhs.size; - isOwner = rhs.isOwner; - - assert(!ASan::RegionIsPoisoned(data, size)); - - rhs.isOwner = false; - - return *this; -} - -Buffer& Buffer::operator=(const Buffer& rhs) { - if (this == &rhs) { - assert(!ASan::RegionIsPoisoned(data, size)); - return *this; - } - - Buffer unOwningTmp(rhs.data, rhs.size); - *this = std::move(unOwningTmp); - assert(!isOwner); - assert(!ASan::RegionIsPoisoned(data, size)); - - return *this; -} - -#if 0 -Buffer* Buffer::clone() { - Buffer *new_map = new Buffer(size); - memcpy(new_map->data, data, size); - return new_map; -} - -Buffer* Buffer::cloneRandomSize() { - uint32_t new_size = (rand() | (rand() << 15)) % size; - Buffer *new_map = new Buffer(new_size); - memcpy(new_map->data, data, new_size); - return new_map; -} - -void Buffer::corrupt(int errors) { - for (int i = 0; i < errors; i++) { - uint32_t pos = (rand() | (rand() << 15)) % size; - data[pos] = rand() & 0xff; - } -} -#endif - -} // namespace rawspeed diff --git a/src/librawspeed/io/Buffer.h b/src/librawspeed/io/Buffer.h index ca0f35b4d..265ff113a 100644 --- a/src/librawspeed/io/Buffer.h +++ b/src/librawspeed/io/Buffer.h @@ -32,25 +32,12 @@ namespace rawspeed { -// This allows to specify the number of bytes that each Buffer needs to -// allocate additionally to be able to remove one runtime bounds check -// in BitStream::fill. There are two sane choices: -// 0 : allocate exactly as much data as required, or -// set it to the value of BitStreamCacheBase::MaxProcessBytes -#define BUFFER_PADDING 0UL - -// if the padding is >= 4, bounds checking in BitStream::fill are not compiled, -// which supposedly saves about 1% on modern CPUs -// WARNING: if the padding is >= 4, do *NOT* create Buffer from -// passed unowning pointer and size. Or, subtract BUFFER_PADDING from size. -// else bound checks will malfunction => bad things can happen !!! - /************************************************************************* * This is the buffer abstraction. * * It allows access to some piece of memory, typically a whole or part * of a raw file. The underlying memory may be owned by the buffer or not. - * It supports move operations to properly deal with owneship transfer. + * It supports move operations to properly deal with ownership transfer. * It intentionally supports only read/const access to the underlying memory. * *************************************************************************/ @@ -67,26 +54,46 @@ class Buffer public: // allocates the databuffer, and returns owning non-const pointer. static std::unique_ptr - Create(size_type size); + Create(size_type size) { + if (!size) + ThrowIOE("Trying to allocate 0 bytes sized buffer."); - // constructs an empty buffer - Buffer() = default; + std::unique_ptr data( + alignedMalloc(roundUp(size, 16)), + &alignedFree); + if (!data) + ThrowIOE("Failed to allocate %uz bytes memory buffer.", size); - // Allocates the memory - explicit Buffer(size_type size_) : Buffer(Create(size_), size_) { - assert(!ASan::RegionIsPoisoned(data, size)); + assert(!ASan::RegionIsPoisoned(data.get(), size)); + + return data; } + // constructs an empty buffer + Buffer() = default; + // creates buffer from owning unique_ptr Buffer(std::unique_ptr data_, - size_type size_); + size_type size_) + : size(size_) { + if (!size) + ThrowIOE("Buffer has zero size?"); + + if (data_.get_deleter() != &alignedFree) + ThrowIOE("Wrong deleter. Expected rawspeed::alignedFree()"); + + data = data_.release(); + if (!data) + ThrowIOE("Memory buffer is nonexistent"); + + assert(!ASan::RegionIsPoisoned(data, size)); + + isOwner = true; + } // Data already allocated explicit Buffer(const uint8_t* data_, size_type size_) : data(data_), size(size_) { - static_assert(BUFFER_PADDING == 0, "please do make sure that you do NOT " - "call this function from YOUR code, and " - "then comment-out this assert."); assert(!ASan::RegionIsPoisoned(data, size)); } @@ -103,10 +110,45 @@ class Buffer } // Frees memory if owned - ~Buffer(); + ~Buffer() { + if (isOwner) { + alignedFreeConstPtr(data); + } + } + + Buffer& operator=(Buffer&& rhs) noexcept { + if (this == &rhs) { + assert(!ASan::RegionIsPoisoned(data, size)); + return *this; + } + + if (isOwner) + alignedFreeConstPtr(data); + + data = rhs.data; + size = rhs.size; + isOwner = rhs.isOwner; - Buffer& operator=(Buffer&& rhs) noexcept; - Buffer& operator=(const Buffer& rhs); + assert(!ASan::RegionIsPoisoned(data, size)); + + rhs.isOwner = false; + + return *this; + } + + Buffer& operator=(const Buffer& rhs) { + if (this == &rhs) { + assert(!ASan::RegionIsPoisoned(data, size)); + return *this; + } + + Buffer unOwningTmp(rhs.data, rhs.size); + *this = std::move(unOwningTmp); + assert(!isOwner); + assert(!ASan::RegionIsPoisoned(data, size)); + + return *this; + } Buffer getSubView(size_type offset, size_type size_) const { if (!isValid(0, offset)) @@ -163,14 +205,8 @@ class Buffer } inline bool isValid(size_type offset, size_type count = 1) const { - return static_cast(offset) + count <= - static_cast(size) + BUFFER_PADDING; + return static_cast(offset) + count <= static_cast(size); } - -// Buffer* clone(); -// /* For testing purposes */ -// void corrupt(int errors); -// Buffer* cloneRandomSize(); }; /* diff --git a/src/librawspeed/io/ByteStream.h b/src/librawspeed/io/ByteStream.h index f59f6ffbf..fb6be6a47 100644 --- a/src/librawspeed/io/ByteStream.h +++ b/src/librawspeed/io/ByteStream.h @@ -152,7 +152,7 @@ class ByteStream : public DataBuffer return DataBuffer::get(pos, i); } - inline uint16_t peekU16() { return peek(); } + inline uint16_t peekU16() const { return peek(); } template inline T get() { auto ret = peek(); diff --git a/src/librawspeed/io/CMakeLists.txt b/src/librawspeed/io/CMakeLists.txt index bb1e44094..3decfffec 100644 --- a/src/librawspeed/io/CMakeLists.txt +++ b/src/librawspeed/io/CMakeLists.txt @@ -4,8 +4,8 @@ FILE(GLOB SOURCES "BitPumpMSB.h" "BitPumpMSB16.h" "BitPumpMSB32.h" + "BitStream.cpp" "BitStream.h" - "Buffer.cpp" "Buffer.h" "ByteStream.h" "Endianness.h" diff --git a/src/librawspeed/io/FileIOException.h b/src/librawspeed/io/FileIOException.h index 315a3bd2f..43ae48bc1 100644 --- a/src/librawspeed/io/FileIOException.h +++ b/src/librawspeed/io/FileIOException.h @@ -21,6 +21,7 @@ #pragma once +#include "rawspeedconfig.h" #include "common/RawspeedException.h" // for ThrowExceptionHelper #include "decoders/RawDecoderException.h" // for RawDecoderException #include // for string @@ -29,8 +30,9 @@ namespace rawspeed { class FileIOException final : public RawDecoderException { public: - explicit FileIOException(const std::string& msg) : RawDecoderException(msg) {} - explicit FileIOException(const char* msg) : RawDecoderException(msg) {} + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + FileIOException(const char* msg) + : RawDecoderException(msg) {} }; #define ThrowFIE(...) \ diff --git a/src/librawspeed/io/IOException.h b/src/librawspeed/io/IOException.h index 74f5c038c..96f69d3eb 100644 --- a/src/librawspeed/io/IOException.h +++ b/src/librawspeed/io/IOException.h @@ -21,6 +21,7 @@ #pragma once +#include "rawspeedconfig.h" #include "common/RawspeedException.h" // for RawspeedException #include // for string @@ -28,8 +29,9 @@ namespace rawspeed { class IOException final : public RawspeedException { public: - explicit IOException(const std::string& msg) : RawspeedException(msg) {} - explicit IOException(const char* msg) : RawspeedException(msg) {} + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + IOException(const char* msg) + : RawspeedException(msg) {} }; #define ThrowIOE(...) ThrowExceptionHelper(rawspeed::IOException, __VA_ARGS__) diff --git a/src/librawspeed/metadata/CameraMetadataException.h b/src/librawspeed/metadata/CameraMetadataException.h index b9b9fa8de..97a185be4 100644 --- a/src/librawspeed/metadata/CameraMetadataException.h +++ b/src/librawspeed/metadata/CameraMetadataException.h @@ -21,6 +21,7 @@ #pragma once +#include "rawspeedconfig.h" #include "common/RawspeedException.h" #include // for string @@ -28,9 +29,9 @@ namespace rawspeed { class CameraMetadataException final : public RawspeedException { public: - explicit CameraMetadataException(const std::string& msg) + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + CameraMetadataException(const char* msg) : RawspeedException(msg) {} - explicit CameraMetadataException(const char* msg) : RawspeedException(msg) {} }; #define ThrowCME(...) \ diff --git a/src/librawspeed/parsers/CiffParserException.h b/src/librawspeed/parsers/CiffParserException.h index 8803fb140..ed5c60558 100644 --- a/src/librawspeed/parsers/CiffParserException.h +++ b/src/librawspeed/parsers/CiffParserException.h @@ -22,6 +22,7 @@ #pragma once +#include "rawspeedconfig.h" #include "common/RawspeedException.h" // for ThrowExceptionHelper #include "parsers/RawParserException.h" // for ThrowRPE, RawParserException #include @@ -30,9 +31,9 @@ namespace rawspeed { class CiffParserException final : public RawParserException { public: - explicit CiffParserException(const std::string& msg) + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + CiffParserException(const char* msg) : RawParserException(msg) {} - explicit CiffParserException(const char* msg) : RawParserException(msg) {} }; #define ThrowCPE(...) \ diff --git a/src/librawspeed/parsers/FiffParserException.h b/src/librawspeed/parsers/FiffParserException.h index d6cb98279..5a1ea3844 100644 --- a/src/librawspeed/parsers/FiffParserException.h +++ b/src/librawspeed/parsers/FiffParserException.h @@ -20,6 +20,7 @@ #pragma once +#include "rawspeedconfig.h" #include "common/RawspeedException.h" // for ThrowExceptionHelper #include "parsers/RawParserException.h" // for ThrowRPE, RawParserException #include @@ -28,9 +29,9 @@ namespace rawspeed { class FiffParserException final : public RawParserException { public: - explicit FiffParserException(const std::string& msg) + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + FiffParserException(const char* msg) : RawParserException(msg) {} - explicit FiffParserException(const char* msg) : RawParserException(msg) {} }; #define ThrowFPE(...) \ diff --git a/src/librawspeed/parsers/RawParserException.h b/src/librawspeed/parsers/RawParserException.h index c93a3f69d..4cd91f2e3 100644 --- a/src/librawspeed/parsers/RawParserException.h +++ b/src/librawspeed/parsers/RawParserException.h @@ -20,6 +20,7 @@ #pragma once +#include "rawspeedconfig.h" #include "common/RawspeedException.h" #include @@ -27,9 +28,9 @@ namespace rawspeed { class RawParserException : public RawspeedException { public: - explicit RawParserException(const std::string& msg) + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + RawParserException(const char* msg) : RawspeedException(msg) {} - explicit RawParserException(const char* msg) : RawspeedException(msg) {} }; #define ThrowRPE(...) \ diff --git a/src/librawspeed/parsers/TiffParser.cpp b/src/librawspeed/parsers/TiffParser.cpp index 4bd60c098..38654c65d 100644 --- a/src/librawspeed/parsers/TiffParser.cpp +++ b/src/librawspeed/parsers/TiffParser.cpp @@ -72,7 +72,7 @@ TiffRootIFDOwner TiffParser::parse(TiffIFD* parent, const Buffer& data) { TiffRootIFDOwner root = std::make_unique( parent, nullptr, bs, - UINT32_MAX); // tell TiffIFD constructur not to parse bs as IFD + UINT32_MAX); // tell TiffIFD constructor not to parse bs as IFD NORangesSet ifds; diff --git a/src/librawspeed/parsers/TiffParserException.h b/src/librawspeed/parsers/TiffParserException.h index 6cb30fba0..78eb32e1b 100644 --- a/src/librawspeed/parsers/TiffParserException.h +++ b/src/librawspeed/parsers/TiffParserException.h @@ -21,6 +21,7 @@ #pragma once +#include "rawspeedconfig.h" #include "common/RawspeedException.h" // for ThrowExceptionHelper #include "parsers/RawParserException.h" // for ThrowRPE, RawParserException #include @@ -29,9 +30,9 @@ namespace rawspeed { class TiffParserException final : public RawParserException { public: - explicit TiffParserException(const std::string& msg) + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + TiffParserException(const char* msg) : RawParserException(msg) {} - explicit TiffParserException(const char* msg) : RawParserException(msg) {} }; #define ThrowTPE(...) \ diff --git a/src/librawspeed/tiff/TiffIFD.cpp b/src/librawspeed/tiff/TiffIFD.cpp index 9fc7b59d5..46bb232ac 100644 --- a/src/librawspeed/tiff/TiffIFD.cpp +++ b/src/librawspeed/tiff/TiffIFD.cpp @@ -60,7 +60,7 @@ void TiffIFD::parseIFDEntry(NORangesSet* ifds, ByteStream* bs) { try { switch (t->tag) { case DNGPRIVATEDATA: - // These are arbitrairly 'rebased', to preserve the offsets, but as it is + // These are arbitrarily 'rebased', to preserve the offsets, but as it is // implemented right now, that could trigger UB (pointer arithmetics, // creating pointer to unowned memory, etc). And since this is not even // used anywhere right now, let's not diff --git a/src/utilities/identify/rawspeed-identify.cpp b/src/utilities/identify/rawspeed-identify.cpp index 43b9c6f80..8d17a161b 100644 --- a/src/utilities/identify/rawspeed-identify.cpp +++ b/src/utilities/identify/rawspeed-identify.cpp @@ -158,17 +158,17 @@ int main(int argc, char* argv[]) { // NOLINT #else // turn the locale ANSI encoded string into UTF-8 so that FileReader can // turn it into UTF-16 later - int size = MultiByteToWideChar(CP_ACP, 0, argv[1], -1, NULL, 0); + int size = MultiByteToWideChar(CP_ACP, 0, argv[1], -1, nullptr, 0); std::wstring wImageFileName; wImageFileName.resize(size); MultiByteToWideChar(CP_ACP, 0, argv[1], -1, &wImageFileName[0], size); - size = WideCharToMultiByte(CP_UTF8, 0, &wImageFileName[0], -1, NULL, 0, - NULL, NULL); + size = WideCharToMultiByte(CP_UTF8, 0, &wImageFileName[0], -1, nullptr, 0, + nullptr, nullptr); std::string _imageFileName; _imageFileName.resize(size); char* imageFileName = &_imageFileName[0]; WideCharToMultiByte(CP_UTF8, 0, &wImageFileName[0], -1, imageFileName, size, - NULL, NULL); + nullptr, nullptr); #endif fprintf(stderr, "Loading file: \"%s\"\n", imageFileName); diff --git a/src/utilities/rsbench/CMakeLists.txt b/src/utilities/rsbench/CMakeLists.txt index 2357e94a8..c69b7a638 100644 --- a/src/utilities/rsbench/CMakeLists.txt +++ b/src/utilities/rsbench/CMakeLists.txt @@ -3,6 +3,10 @@ rawspeed_add_executable(rsbench main.cpp) target_link_libraries(rsbench rawspeed) target_link_libraries(rsbench rawspeed_bench) +if(TARGET RawSpeed::OpenMP_CXX) + target_link_libraries(rsbench RawSpeed::OpenMP_CXX) +endif() + rawspeed_add_test(NAME utilities/rsbench COMMAND rsbench --help) add_dependencies(benchmarks rsbench) diff --git a/src/utilities/rstest/md5.h b/src/utilities/rstest/md5.h index 0b76895d2..f5922b23b 100644 --- a/src/utilities/rstest/md5.h +++ b/src/utilities/rstest/md5.h @@ -27,6 +27,8 @@ * Software. */ +#pragma once + #include // for array #include // for uint8_t, uint32_t #include // for size_t diff --git a/src/utilities/rstest/rstest.cpp b/src/utilities/rstest/rstest.cpp index dc255fc4c..60b9ac306 100644 --- a/src/utilities/rstest/rstest.cpp +++ b/src/utilities/rstest/rstest.cpp @@ -101,9 +101,8 @@ class RstestHashMismatch final : public rawspeed::RawspeedException { public: size_t time; - explicit RstestHashMismatch(const std::string& msg, size_t time_) - : RawspeedException(msg), time(time_) {} - explicit RstestHashMismatch(const char* msg, size_t time_) + explicit RAWSPEED_UNLIKELY_FUNCTION RAWSPEED_NOINLINE + RstestHashMismatch(const char* msg, size_t time_) : RawspeedException(msg), time(time_) {} }; diff --git a/test/librawspeed/test/ExceptionsTest.cpp b/test/librawspeed/test/ExceptionsTest.cpp index a8bdb9b59..bfb4002b1 100644 --- a/test/librawspeed/test/ExceptionsTest.cpp +++ b/test/librawspeed/test/ExceptionsTest.cpp @@ -47,7 +47,7 @@ using std::unique_ptr; namespace rawspeed_test { -static const std::string msg("my very Smart error Message #1 !"); +static const char* msg = "my very Smart error Message #1 !"; #define FMT "%s" @@ -191,15 +191,15 @@ TYPED_TEST(ExceptionsTest, ThrowMessage) { } TYPED_TEST(ExceptionsTest, ThrowHelperTest) { - ASSERT_ANY_THROW(MetaHelper(msg.c_str())); - EXPECT_THROW(MetaHelper(msg.c_str()), std::runtime_error); - EXPECT_THROW(MetaHelper(msg.c_str()), RawspeedException); - EXPECT_THROW(MetaHelper(msg.c_str()), TypeParam); + ASSERT_ANY_THROW(MetaHelper(msg)); + EXPECT_THROW(MetaHelper(msg), std::runtime_error); + EXPECT_THROW(MetaHelper(msg), RawspeedException); + EXPECT_THROW(MetaHelper(msg), TypeParam); } TYPED_TEST(ExceptionsTest, ThrowHelperTestMessage) { try { - MetaHelper(msg.c_str()); + MetaHelper(msg); } catch (std::exception& ex) { ASSERT_THAT(ex.what(), testing::HasSubstr(msg)); }