|
- ///////////////////////////////////////////////////////////////////////////
- //
- // Copyright (c) 2009-2014 DreamWorks Animation LLC.
- //
- // All rights reserved.
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are
- // met:
- // * Redistributions of source code must retain the above copyright
- // notice, this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above
- // copyright notice, this list of conditions and the following disclaimer
- // in the documentation and/or other materials provided with the
- // distribution.
- // * Neither the name of DreamWorks Animation nor the names of
- // its contributors may be used to endorse or promote products derived
- // from this software without specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- //
- ///////////////////////////////////////////////////////////////////////////
- #define OPENEXR_BUILTIN_TABLES
- //
- // A program to generate various acceleration lookup tables
- // for Imf::DwaCompressor
- //
- #include <cstddef>
- #include <stdio.h>
- #include <stdlib.h>
- #include <math.h>
- #include <vector>
- #include <OpenEXRConfig.h>
- #ifndef OPENEXR_BUILTIN_TABLES
- #ifdef OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN
- #include <unistd.h>
- #endif
- #endif // OPENEXR_BUILTIN_TABLES
- #include <half.h>
- #include <IlmThread.h>
- #include <IlmThreadSemaphore.h>
- #include <ImfIO.h>
- #include <ImfXdr.h>
- #include "ImfNamespace.h"
- using namespace OPENEXR_IMF_NAMESPACE;
- namespace {
- #ifdef OPENEXR_BUILTIN_TABLES
- static unsigned short dwaCompressorNoOp[0x10000] = {};
- static unsigned short dwaCompressorToLinear[0x10000] = {};
- static unsigned short dwaCompressorToNonlinear[0x10000] = {};
- //static unsigned int closestDataOffset[0x10000] = {};
- //static unsigned short closestData[0x80000] = {};
- #else
- class LutHeaderWorker
- {
- public:
- class Runner : public ILMTHREAD_NAMESPACE::Thread
- {
- public:
- Runner(LutHeaderWorker &worker, bool output):
- ILMTHREAD_NAMESPACE::Thread(),
- _worker(worker),
- _output(output)
- {
- start();
- }
- virtual ~Runner()
- {
- _semaphore.wait();
- }
- virtual void run()
- {
- _semaphore.post();
- _worker.run(_output);
- }
- private:
- LutHeaderWorker &_worker;
- bool _output;
- ILMTHREAD_NAMESPACE::Semaphore _semaphore;
- }; // class LutHeaderWorker::Runner
- LutHeaderWorker(size_t startValue,
- size_t endValue):
- _lastCandidateCount(0),
- _startValue(startValue),
- _endValue(endValue),
- _numElements(0),
- _offset(new size_t[numValues()]),
- _elements(new unsigned short[1024*1024*2])
- {
- }
- ~LutHeaderWorker()
- {
- delete[] _offset;
- delete[] _elements;
- }
- size_t lastCandidateCount() const
- {
- return _lastCandidateCount;
- }
- size_t numValues() const
- {
- return _endValue - _startValue;
- }
- size_t numElements() const
- {
- return _numElements;
- }
- const size_t* offset() const
- {
- return _offset;
- }
- const unsigned short* elements() const
- {
- return _elements;
- }
- void run(bool outputProgress)
- {
- half candidate[16];
- int candidateCount = 0;
- for (size_t input=_startValue; input<_endValue; ++input) {
- if (outputProgress) {
- #ifdef __GNUC__
- if (input % 100 == 0) {
- fprintf(stderr,
- " Building acceleration for DwaCompressor, %.2f %% %c",
- 100.*(float)input/(float)numValues(), 13);
- }
- #else
- if (input % 1000 == 0) {
- fprintf(stderr,
- " Building acceleration for DwaCompressor, %.2f %%\n",
- 100.*(float)input/(float)numValues());
- }
- #endif
- }
-
- int numSetBits = countSetBits(input);
- half inputHalf, closestHalf;
- inputHalf.setBits(input);
- _offset[input - _startValue] = _numElements;
- // Gather candidates
- candidateCount = 0;
- for (int targetNumSetBits=numSetBits-1; targetNumSetBits>=0;
- --targetNumSetBits) {
- bool valueFound = false;
- for (int i=0; i<65536; ++i) {
- if (countSetBits(i) != targetNumSetBits) continue;
- if (!valueFound) {
- closestHalf.setBits(i);
- valueFound = true;
- } else {
- half tmpHalf;
- tmpHalf.setBits(i);
- if (fabs((float)inputHalf - (float)tmpHalf) <
- fabs((float)inputHalf - (float)closestHalf)) {
- closestHalf = tmpHalf;
- }
- }
- }
- if (valueFound == false) {
- fprintf(stderr, "bork bork bork!\n");
- }
- candidate[candidateCount] = closestHalf;
- candidateCount++;
- }
- // Sort candidates by increasing number of bits set
- for (int i=0; i<candidateCount; ++i) {
- for (int j=i+1; j<candidateCount; ++j) {
- int iCnt = countSetBits(candidate[i].bits());
- int jCnt = countSetBits(candidate[j].bits());
- if (jCnt < iCnt) {
- half tmp = candidate[i];
- candidate[i] = candidate[j];
- candidate[j] = tmp;
- }
- }
- }
- // Copy candidates to the data buffer;
- for (int i=0; i<candidateCount; ++i) {
- _elements[_numElements] = candidate[i].bits();
- _numElements++;
- }
- if (input == _endValue-1) {
- _lastCandidateCount = candidateCount;
- }
- }
- }
-
- private:
- size_t _lastCandidateCount;
- size_t _startValue;
- size_t _endValue;
- size_t _numElements;
- size_t *_offset;
- unsigned short *_elements;
- //
- // Precomputing the bit count runs faster than using
- // the builtin instruction, at least in one case..
- //
- // Precomputing 8-bits is no slower than 16-bits,
- // and saves a fair bit of overhead..
- //
- int countSetBits(unsigned short src)
- {
- static const unsigned short numBitsSet[256] =
- {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
- };
- return numBitsSet[src & 0xff] + numBitsSet[src >> 8];
- }
- }; // class LutHeaderWorker
- #endif // OPENEXR_BUILTIN_TABLES
- } // namespace
- //
- // Generate a no-op LUT, to cut down in conditional branches
- //
- static void
- generateNoop()
- {
- #ifndef OPENEXR_BUILTIN_TABLES
- printf("const unsigned short dwaCompressorNoOp[] = \n");
- printf("{");
- #endif // OPENEXR_BUILTIN_TABLES
- for (int i=0; i<65536; ++i) {
- #ifndef OPENEXR_BUILTIN_TABLES
- if (i % 8 == 0) {
- printf("\n ");
- }
- #endif // OPENEXR_BUILTIN_TABLES
- unsigned short dst;
- char *tmp = (char *)(&dst);
- unsigned short src = (unsigned short)i;
- Xdr::write <CharPtrIO> (tmp, src);
- #ifndef OPENEXR_BUILTIN_TABLES
- printf("0x%04x, ", dst);
- #else
- dwaCompressorNoOp[i] = dst;
- #endif // OPENEXR_BUILTIN_TABLES
- }
- #ifndef OPENEXR_BUILTIN_TABLES
- printf("\n};\n");
- #endif // OPENEXR_BUILTIN_TABLES
- }
- //
- // Nonlinearly encode luminance. For values below 1.0, we want
- // to use a gamma 2.2 function to match what is fairly common
- // for storing output referred. However, > 1, gamma functions blow up,
- // and log functions are much better behaved. We could use a log
- // function everywhere, but it tends to over-sample dark
- // regions and undersample the brighter regions, when
- // compared to the way real devices reproduce values.
- //
- // So, above 1, use a log function which is a smooth blend
- // into the gamma function.
- //
- // Nonlinear(linear) =
- //
- // linear^(1./2.2) / linear <= 1.0
- // |
- // ln(linear)/ln(e^2.2) + 1 \ otherwise
- //
- //
- // toNonlinear[] needs to take in XDR format half float values,
- // and output NATIVE format float.
- //
- // toLinear[] does the opposite - takes in NATIVE half and
- // outputs XDR half values.
- //
- static void
- generateToLinear()
- {
- #ifndef OPENEXR_BUILTIN_TABLES
- unsigned short toLinear[65536];
- #else
- unsigned short* toLinear = dwaCompressorToLinear;
- #endif // OPENEXR_BUILTIN_TABLES
- toLinear[0] = 0;
- for (int i=1; i<65536; ++i) {
- half h;
- float sign = 1;
- float logBase = pow(2.7182818, 2.2);
- // map NaN and inf to 0
- if ((i & 0x7c00) == 0x7c00) {
- toLinear[i] = 0;
- continue;
- }
- //
- // _toLinear - assume i is NATIVE, but our output needs
- // to get flipped to XDR
- //
- h.setBits(i);
- sign = 1;
- if ((float)h < 0) {
- sign = -1;
- }
- if ( fabs( (float)h) <= 1.0 ) {
- h = (half)(sign * pow((float)fabs((float)h), 2.2f));
- } else {
- h = (half)(sign * pow(logBase, (float)(fabs((float)h) - 1.0)));
- }
- {
- char *tmp = (char *)(&toLinear[i]);
- Xdr::write <CharPtrIO> ( tmp, h.bits());
- }
- }
- #ifndef OPENEXR_BUILTIN_TABLES
- printf("const unsigned short dwaCompressorToLinear[] = \n");
- printf("{");
- for (int i=0; i<65536; ++i) {
- if (i % 8 == 0) {
- printf("\n ");
- }
- printf("0x%04x, ", toLinear[i]);
- }
- printf("\n};\n");
- #endif // OPENEXR_BUILTIN_TABLES
- }
- static void
- generateToNonlinear()
- {
- #ifndef OPENEXR_BUILTIN_TABLES
- unsigned short toNonlinear[65536];
- #else
- unsigned short* toNonlinear = dwaCompressorToNonlinear;
- #endif // OPENEXR_BUILTIN_TABLES
- toNonlinear[0] = 0;
- for (int i=1; i<65536; ++i) {
- unsigned short usNative, usXdr;
- half h;
- float sign = 1;
- float logBase = pow(2.7182818, 2.2);
- usXdr = i;
- {
- const char *tmp = (char *)(&usXdr);
- Xdr::read<CharPtrIO>(tmp, usNative);
- }
- // map NaN and inf to 0
- if ((usNative & 0x7c00) == 0x7c00) {
- toNonlinear[i] = 0;
- continue;
- }
- //
- // toNonlinear - assume i is XDR
- //
- h.setBits(usNative);
- sign = 1;
- if ((float)h < 0) {
- sign = -1;
- }
- if ( fabs( (float)h ) <= 1.0) {
- h = (half)(sign * pow(fabs((float)h), 1.f/2.2f));
- } else {
- h = (half)(sign * ( log(fabs((float)h)) / log(logBase) + 1.0) );
- }
- toNonlinear[i] = h.bits();
- }
- #ifndef OPENEXR_BUILTIN_TABLES
- printf("const unsigned short dwaCompressorToNonlinear[] = \n");
- printf("{");
- for (int i=0; i<65536; ++i) {
- if (i % 8 == 0) {
- printf("\n ");
- }
- printf("0x%04x, ", toNonlinear[i]);
- }
- printf("\n};\n");
- #endif // OPENEXR_BUILTIN_TABLES
- }
- #ifndef OPENEXR_BUILTIN_TABLES
- //
- // Attempt to get available CPUs in a somewhat portable way.
- //
- int
- cpuCount()
- {
- if (!ILMTHREAD_NAMESPACE::supportsThreads()) return 1;
- int cpuCount = 1;
- #if defined (OPENEXR_IMF_HAVE_SYSCONF_NPROCESSORS_ONLN)
- cpuCount = sysconf(_SC_NPROCESSORS_ONLN);
- #elif defined (_WIN32)
- SYSTEM_INFO sysinfo;
- GetSystemInfo( &sysinfo );
- cpuCount = sysinfo.dwNumberOfProcessors;
- #endif
- if (cpuCount < 1) cpuCount = 1;
- return cpuCount;
- }
- //
- // Generate acceleration luts for the quantization.
- //
- // For each possible input value, we want to find the closest numbers
- // which have one fewer bits set than before.
- //
- // This gives us num_bits(input)-1 values per input. If we alloc
- // space for everything, that's like a 2MB table. We can do better
- // by compressing all the values to be contigious and using offset
- // pointers.
- //
- // After we've found the candidates with fewer bits set, sort them
- // based on increasing numbers of bits set. This way, on quantize(),
- // we can scan through the list and halt once we find the first
- // candidate within the error range. For small values that can
- // be quantized to 0, 0 is the first value tested and the search
- // can exit fairly quickly.
- //
- void
- generateLutHeader()
- {
- std::vector<LutHeaderWorker*> workers;
- size_t numWorkers = cpuCount();
- size_t workerInterval = 65536 / numWorkers;
- for (size_t i=0; i<numWorkers; ++i) {
- if (i != numWorkers-1) {
- workers.push_back( new LutHeaderWorker( i *workerInterval,
- (i+1)*workerInterval) );
- } else {
- workers.push_back( new LutHeaderWorker(i*workerInterval, 65536) );
- }
- }
- if (ILMTHREAD_NAMESPACE::supportsThreads()) {
- std::vector<LutHeaderWorker::Runner*> runners;
- for (size_t i=0; i<workers.size(); ++i) {
- runners.push_back( new LutHeaderWorker::Runner(*workers[i], (i==0)) );
- }
- for (size_t i=0; i<workers.size(); ++i) {
- delete runners[i];
- }
- } else {
- for (size_t i=0; i<workers.size(); ++i) {
- workers[i]->run(i == 0);
- }
- }
- printf("static unsigned int closestDataOffset[] = {\n");
- int offsetIdx = 0;
- int offsetPrev = 0;
- for (size_t i=0; i<workers.size(); ++i) {
- for (size_t value=0; value<workers[i]->numValues(); ++value) {
- if (offsetIdx % 8 == 0) {
- printf(" ");
- }
- printf("%6lu, ", workers[i]->offset()[value] + offsetPrev);
- if (offsetIdx % 8 == 7) {
- printf("\n");
- }
- offsetIdx++;
- }
- offsetPrev += workers[i]->offset()[workers[i]->numValues()-1] +
- workers[i]->lastCandidateCount();
- }
- printf("};\n\n\n");
- printf("static unsigned short closestData[] = {\n");
- int elementIdx = 0;
- for (size_t i=0; i<workers.size(); ++i) {
- for (size_t element=0; element<workers[i]->numElements(); ++element) {
- if (elementIdx % 8 == 0) {
- printf(" ");
- }
- printf("%5d, ", workers[i]->elements()[element]);
- if (elementIdx % 8 == 7) {
- printf("\n");
- }
- elementIdx++;
- }
- }
- printf("};\n\n\n");
- for (size_t i=0; i<workers.size(); ++i) {
- delete workers[i];
- }
- }
- int
- main(int argc, char **argv)
- {
- printf("#include <cstddef>\n");
- printf("\n\n\n");
- generateNoop();
- printf("\n\n\n");
- generateToLinear();
- printf("\n\n\n");
- generateToNonlinear();
- printf("\n\n\n");
- generateLutHeader();
- return 0;
- }
- #else // OPENEXR_BUILTIN_TABLES
- #include "dwaLookups.h"
- OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_ENTER
- static void init_dwa_()
- {
- generateNoop();
- generateToLinear();
- generateToNonlinear();
- // N/A: generateLutHeader();
- }
- static inline void init_dwa()
- {
- static bool initialized = false;
- if (!initialized)
- {
- init_dwa_();
- initialized = true;
- }
- }
- const unsigned short* get_dwaCompressorNoOp()
- {
- init_dwa();
- return dwaCompressorNoOp;
- }
- const unsigned short* get_dwaCompressorToLinear()
- {
- init_dwa();
- return dwaCompressorToLinear;
- }
- const unsigned short* get_dwaCompressorToNonlinear()
- {
- init_dwa();
- return dwaCompressorToNonlinear;
- }
- OPENEXR_IMF_INTERNAL_NAMESPACE_SOURCE_EXIT
- #endif // OPENEXR_BUILTIN_TABLES
|