// Grade.C
// Copyright (c) 2009 The Foundry Visionmongers Ltd. All Rights Reserved.
const char* const HELP =
"
Applies a linear ramp followed by a gamma function to each color channel.
"
" A = multiply * (gain-lift)/(whitepoint-blackpoint)
"
" B = offset + lift - A*blackpoint
"
" output = pow(A*input + B, 1/gamma)
"
"The reverse option is also provided so that you can copy-paste this node to "
"invert the grade. This will do the opposite gamma correction followed by the "
"opposite linear ramp.";
#include "DDImage/PixelIop.h"
#include "DDImage/Row.h"
#include "DDImage/DDMath.h"
#include "DDImage/NukeWrapper.h"
#include
using namespace DD::Image;
static const char* const CLASS = "Grade";
class GradeIop : public PixelIop
{
float blackpoint[4];
float whitepoint[4];
float black[4];
float white[4];
float add[4];
float multiply[4];
float gamma[4];
bool reverse;
bool black_clamp;
bool white_clamp;
// String used to store the fragment shader source code generated and returned by gpuEngine_body().
// Needs to be mutable since gpuEngine_body() is (quite reasonably) const.
mutable std::string _shaderBodyText;
public:
GradeIop(Node* node) : PixelIop(node)
{
for (int n = 0; n < 4; n++) {
black[n] = blackpoint[n] = add[n] = 0.0f;
white[n] = whitepoint[n] = multiply[n] = 1.0f;
gamma[n] = 1.0f;
}
reverse = false;
black_clamp = true;
white_clamp = false;
}
// indicate that channels only depend on themselves:
void in_channels(int, ChannelSet& channels) const override { }
void pixel_engine(const Row &in, int y, int x, int r, ChannelMask, Row &) override;
void knobs(Knob_Callback) override;
const char* Class() const override { return CLASS; }
const char* node_help() const override { return HELP; }
static const Iop::Description d;
const char* gpuEngine_decl() const override;
const char* gpuEngine_body() const override;
Hash gpuEngine_shader_hash_at(double time) override;
void gpuEngine_GL_begin(DD::Image::GPUContext* context) override;
void gpuEngine_GL_end(DD::Image::GPUContext* context) override;
void _validate(bool for_real) override;
bool pass_transform() const override { return true; }
/// Returns true if all of the components of the gamma knob are set to (exactly) 1 and none
/// of them are animated. If that's the case the GPU shader can be simplified and in particular
/// avoid an expensive pow() call. Otherwise the GPU path will use a uniform for uploading the
/// gamma components, regardless of whether they're animated as there's little benefit to
/// baking them into the shader as the pow() call can't be optimised out if they're not all 1.
///
/// If we bake non-animated gamma values into the shader, the compiler may be able to optimise
/// a little more, since it could exclude the calculations and subsequent selection logic needed
/// to handle the case whethe gamma has the opposite sign (though in practice this will almost
/// certainly only make any difference if all the components have the same sign).
/// However, that should be small compared to the cost of the pow() and we've also seen problems
/// on linux RHEL 6 where a baked in value of 2.0 (though not 1.99 or 2.01!) would cause the GLSL
/// linker to crash! (Bug 46269 - NukeStudio - Grade Soft-effect - Entering '2.0' in the gamma
/// knob and hitting Enter crashes Studio)
///
/// (Note: There is the possibility that gamma is set as animated but in fact all the keys have
/// a value of 1, in which case we'd needlessly pay the cost of the pow() calls, but that's
/// unlikely and something we shoudl encourage users to avoid.)
bool isGammaConstantOne() const;
};
void GradeIop::_validate(bool for_real)
{
bool change_any = black_clamp | white_clamp;
bool change_zero = false;
for (int z = 0; z < 4; z++) {
float A = whitepoint[z] - blackpoint[z];
A = A ? (white[z] - black[z]) / A : 10000.0f;
A *= multiply[z];
float B = add[z] + black[z] - blackpoint[z] * A;
if (A != 1 || B || gamma[z] != 1.0f) {
change_any = true;
if (B)
change_zero = true;
}
}
set_out_channels(change_any ? Mask_All : Mask_None);
PixelIop::_validate(for_real);
if (change_zero)
info_.black_outside(false);
}
void GradeIop::pixel_engine(const Row& in, int y, int x, int r,
ChannelMask channels, Row& out)
{
foreach (n, channels) {
unsigned z = colourIndex(n);
if (z > 3) {
out.copy(in, n, x, r);
continue;
}
float A = whitepoint[z] - blackpoint[z];
A = A ? (white[z] - black[z]) / A : 10000.0f;
A *= multiply[z];
float B = add[z] + black[z] - blackpoint[z] * A;
if (!B && in.is_zero(n)) {
out.erase(n);
continue;
}
float G = gamma[z];
// patch for linux alphas because the pow function behaves badly
// for very large or very small exponent values.
#ifdef __alpha
if (G < 0.008f)
G = 0.0f;
if (G > 125.0f)
G = 125.0f;
#endif
const float* inptr = in[n] + x;
float* OUTBUF = out.writable(n) + x;
float* END = OUTBUF + (r - x);
if (!reverse) {
// do the linear interpolation:
if (A != 1 || B) {
for (float* outptr = OUTBUF; outptr < END;)
*outptr++ = *inptr++ *A + B;
inptr = OUTBUF;
}
// clamp
if (white_clamp || black_clamp) {
for (float* outptr = OUTBUF; outptr < END;) {
float a = *inptr++;
if (a < 0.0f && black_clamp)
a = 0.0f;
else if (a > 1.0f && white_clamp)
a = 1.0f;
*outptr++ = a;
}
inptr = OUTBUF;
}
// do the gamma:
if (G <= 0) {
for (float* outptr = OUTBUF; outptr < END;) {
float V = *inptr++;
if (V < 1.0f)
V = 0.0f;
else if (V > 1.0f)
V = INFINITY;
*outptr++ = V;
}
}
else if (G != 1.0f) {
G = 1.0f / G;
for (float* outptr = OUTBUF; outptr < END;) {
float V = *inptr++;
if (V <= 0.0f)
; //V = 0.0f;
#ifdef __alpha
else if (V <= 1e-6f && G > 1.0f)
V = 0.0f;
#endif
else if (V < 1)
V = powf(V, G);
else
V = 1.0f + (V - 1.0f) * G;
*outptr++ = V;
}
}
else if (inptr != OUTBUF) {
memcpy(OUTBUF, inptr, (END - OUTBUF) * sizeof(*OUTBUF));
}
}
else {
// Reverse gamma:
if (G <= 0) {
for (float* outptr = OUTBUF; outptr < END;)
*outptr++ = *inptr++ > 0.0f ? 1.0f : 0.0f;
inptr = OUTBUF;
}
else if (G != 1.0f) {
for (float* outptr = OUTBUF; outptr < END;) {
float V = *inptr++;
if (V <= 0.0f)
; //V = 0.0f;
#ifdef __alpha
else if (V <= 1e-6f && G > 1.0f)
V = 0.0f;
#endif
else if (V < 1.0f)
V = powf(V, G);
else
V = 1.0f + (V - 1.0f) * G;
*outptr++ = V;
}
inptr = OUTBUF;
}
// Reverse the linear part:
if (A != 1.0f || B) {
if (A)
A = 1 / A;
else
A = 1.0f;
B = -B * A;
for (float* outptr = OUTBUF; outptr < END;)
*outptr++ = *inptr++ *A + B;
inptr = OUTBUF;
}
// clamp
if (white_clamp || black_clamp) {
for (float* outptr = OUTBUF; outptr < END;) {
float a = *inptr++;
if (a < 0.0f && black_clamp)
a = 0.0f;
else if (a > 1.0f && white_clamp)
a = 1.0f;
*outptr++ = a;
}
inptr = OUTBUF;
}
else if (inptr != OUTBUF) {
memcpy(OUTBUF, inptr, (END - OUTBUF) * sizeof(*OUTBUF));
}
}
}
}
#include "DDImage/Knobs.h"
void GradeIop::knobs(Knob_Callback f)
{
AColor_knob(f, blackpoint, IRange(-1, 1), "blackpoint");
Tooltip(f, "This color is turned into black");
AColor_knob(f, whitepoint, IRange(0, 4), "whitepoint");
Tooltip(f, "This color is turned into white");
AColor_knob(f, black, IRange(-1, 1), "black", "lift");
Tooltip(f, "Black is turned into this color");
AColor_knob(f, white, IRange(0, 4), "white", "gain");
Tooltip(f, "White is turned into this color");
AColor_knob(f, multiply, IRange(0, 4), "multiply");
Tooltip(f, "Constant to multiply result by");
AColor_knob(f, add, IRange(-1, 1), "add", "offset");
Tooltip(f, "Constant to add to result (raises both black & white, unlike lift)");
AColor_knob(f, gamma, IRange(0.2, 5), "gamma");
Tooltip(f, "Gamma correction applied to final result");
Bool_knob(f, &reverse, "reverse");
SetFlags(f, Knob::STARTLINE);
Tooltip(f, "Invert the math to undo the correction");
Bool_knob(f, &black_clamp, "black_clamp", "black clamp");
Tooltip(f, "Output that is less than zero is changed to zero");
Bool_knob(f, &white_clamp, "white_clamp", "white clamp");
Tooltip(f, "Output that is greater than 1 is changed to 1");
}
const char* GradeIop::gpuEngine_decl() const
{
if (nodeContext() != eTimeline)
return nullptr;
// If all the gamma components are not guaranteed to be 1 then we need to upload their
// values to a uniform - there's little point baking non-animated values into the shader
// as we'll have to pay for the pow() either way.
if (!isGammaConstantOne()) {
return
"uniform vec4 $$A; \n"
"uniform vec4 $$B; \n"
"uniform vec4 $$gamma; \n"
;
}
else {
return
"uniform vec4 $$A; \n"
"uniform vec4 $$B; \n"
;
}
}
bool GradeIop::isGammaConstantOne() const
{
DD::Image::Knob* gamma = knob("gamma");
// we can use pow optimisation if gamma isn't animated and all the 4 components of the knob are 1
if (gamma->is_animated()) {
return false;
}
const int kNumComponents = 4;
for (size_t i = 0; i < kNumComponents; ++i) {
if (gamma->get_value(i) != 1.0) {
return false;
}
}
return true;
}
const char* GradeIop::gpuEngine_body() const
{
if ( nodeContext() != eTimeline)
return nullptr;
// We're using uniforms for the linear inpterpolation terms A and B, which contain the dependency on the
// whitepoint, blackpoint, white, black, multiply and add knobs. Uniforms are used to allow animation of
// the knobs. The usage of A and B in the shader is very simple and I've discerned no performance advantage
// to baking the values into the shader source when they're not animated.
// Note that the values of A and B are assumed to be set apparopriately according to whether we're applying
// a forward or reverse grade, i.e. the shader does not manipulate them before applying a reverse grade.
//
// The following knobs always have their values baked into the source:
// reverse, black_clamp and white_clamp
// This is because they are only bools so even if they're animated there will only be a small number of variations
// of the source code and baking the values into the source allows the compiler to do some noticeable optimisation,
// especially with reverse baked in as that completely eliminates half the code.
// Note that the knobs that affect the source need to be take into account by gpuEngine_shader_hash_at.
// NOTE: GLSL 1.2, which is our current minimum spec due to Snow Leopard being GL 2.1, means we don't have
// mix(T, T, bvec), hence the casting of bvec to vec4 in various places.
std::stringstream shaderText;
if (reverse == false) {
//
// The forward case
//
shaderText <<
" { \n"
// Save the inpt alpha value so we can simply restore it at the end rather than trying to make sure that every
// part of the processing leaves it untouched.
" float inputAlpha = OUT.a; \n"
//
// Linear interpolation
//
" vec4 fwd_out = OUT * $$A + $$B;\n"
//
// Apply clamping
//
" fwd_out = mix( fwd_out, vec4(0.0), (vec4(1.0) - clamp(sign(fwd_out), vec4(0.0), vec4(1.0))) * float($black_clamp$) );\n"
" fwd_out = mix( fwd_out, vec4(1.0), clamp(sign(fwd_out - vec4(1.0)), vec4(0.0), vec4(1.0)) * float($white_clamp$) );\n"
;
//
// Apply gamma.
//
if (!isGammaConstantOne()) {
// At least one of the gamma components is not 1 or is animated so we can't exclude the gamma code.
shaderText <<
// If the (clamped) input colour is zero or negative then the pow() call will yield undefined results, I get
// NaNs on my machine. The code futher will select fwd_out in that case but since it's using a mix() the linear
// interpolation would invole the undefined result of the pow() and so itself be undefined.
// So, if the input is <= 0 we just pass 1 into the pow() call so we get a valid result, which we can
// subsequently safely 'mix out' ignore.
" vec4 inputToPow = mix(fwd_out, vec4(1.0), vec4(lessThanEqual(fwd_out, vec4(0.0)))); \n"
// TODO
// The CPU calculation uses 1/gamma as the exponent in the pow() and also as a scale if not using the pow().
// If gamma is exactly zero this results in infinity, which we want to avoid - see my comment for kPlusInfinity.
// Since, if gamma is zero, we're not going to select the results calculated assuming positive gamma anyway,
// the value we use for 1/gamma doesn't matter so long as it doesn't result in infinity or NaN creeping
// into the final answer.
" vec4 gammaForDenom = mix($$gamma, vec4(1.0), vec4(equal($$gamma, vec4(0.0)))); \n"
" vec4 oneOverGamma = 1.0 / gammaForDenom; \n"
// Calculate the value assuming gamma is strictly positive.
//
// First calculate the values depending on if the value is < or > 1, select the appropriate value, then select
// either the original value or the gamma-applied one depending on whether the value was negative.
// Result if value < 1
" vec4 fwd_ltone_with_gamma = pow(inputToPow, oneOverGamma);\n"
// Result if value > 1
" vec4 fwd_gtone_with_gamma = vec4(1.0) + (fwd_out - vec4(1.0)) * oneOverGamma;\n"
// Select from the above based on the value being < > 1
" vec4 withPositiveGamma = mix(fwd_ltone_with_gamma, fwd_gtone_with_gamma, clamp(sign(fwd_out - vec4(1.0)), vec4(0.0), vec4(1.0)));\n"
// Select from the above based on the value being < > 0
" withPositiveGamma = mix(fwd_out, withPositiveGamma, clamp(sign(fwd_out), vec4(0.0), vec4(1.0)));\n"
// TODO
// The CPU path uses infinity but we're not going to do that here for a few reasons, at least for now:
// - Not all GPUs may be sufficiently IEEE 754 compliant for this to be well defined.
// - We still don't see '+inf' in the colour picker but something around 65500, presumably because of issues
// related to the half float render targets Hiero uses, or maybe what we're doing with the read-back?
// - As described above, subsequent calculatons seem to result in NaNs.
// So, for now just use a pretty large number. Note that I'm deliberately using something well within half
// float representation.
// NOTE: This means the final results can differ between the GPU and CPU paths. I've not seen obvious
// discrepencies in the rendered image (even with zero gamma and super whites) but the colour sampler
// will report different values, e.g. 'inf' for the CPU path but ~10,000 for the GPU path.
" const vec4 kPlusInfinity = vec4(9999.0); \n"
// Now do the calculations needed if any of the gamma components are negative or zero.
//
// If the pre-gamma value input is < 1 then set it to 0, if it's > 1 set it to +inf, otherwise leave it with its value of 1.
" vec4 withNegativeGamma = step(vec4(1.0), fwd_out); \n" // Sets 0 and 1 for input < 1 and >= 1, respectively.
" bvec4 inputGreaterThan1 = greaterThan(fwd_out, vec4(1.0)); \n"
" withNegativeGamma = mix(withNegativeGamma, kPlusInfinity, vec4(inputGreaterThan1)); \n" // Sets +inf for input > 1.
// Select the appropriate set of values according to the gamma sign, on a component-wise basis.
" fwd_out = mix(withPositiveGamma, withNegativeGamma, vec4(lessThanEqual($$gamma, vec4(0.0)))); \n"
;
}
shaderText <<
// Ensure we pass on the original alpha value.
" OUT = vec4(fwd_out.rgb, inputAlpha); \n"
" } \n"
;
}
else {
//
// The reverse case
//
shaderText <<
" { \n"
// Save the inpt alpha value so we can simply restore it at the end rather than trying to make sure that every
// part of the processing leaves it untouched.
" float inputAlpha = OUT.a; \n"
" vec4 rev_out = OUT; \n"
;
//
// Apply gamma
//
if (!isGammaConstantOne()) {
// At least one of the gamma components is not 1 or is animated so we can't exclude the gamma code.
shaderText <<
// If the (clamped) input colour is zero or negative then the pow() call will year undefined results, I get
// nans on my machine. The code futher will select fwd_out in that case but since it's using a mix() the linear
// interpolation would invole the undefined result of the pow() and so itself be undefined.
// So, if the input is <= 0 we just pass 1 into the pow() call so we get a valid result, which we can
// subsequently safely 'mix out' ignore.
" vec4 inputToPow = mix(rev_out, vec4(1.0), vec4(lessThanEqual(rev_out, vec4(0.0)))); \n"
// Calculate the value assuming gamma is strictly positive.
//
// Result if value < 1
" vec4 rev_ltone_with_gamma = pow(inputToPow, $$gamma);\n"
// Result if value > 1
" vec4 rev_gtone_with_gamma = vec4(1.0) + (rev_out - 1.0) * $$gamma;\n"
// Select from the above based on the value being < > 1
" vec4 rev_with_gamma = mix(rev_ltone_with_gamma, rev_gtone_with_gamma, clamp(sign(rev_out - vec4(1.0)), vec4(0.0), vec4(1.0)));\n"
// Select from the above based on the value being < > 0
" vec4 withPositiveGamma = mix(rev_out, rev_with_gamma, clamp(sign(rev_out), vec4(0.0), vec4(1.0)));\n"
// Now do the calculations needed if any of the gamma components are negative or zero.
//
// If the pre-gamma value input is > 0 then we set the output value to 1, otherwise we set it to 0.
" vec4 withNegativeGamma = mix(vec4(0.0), vec4(1.0), vec4(greaterThan(rev_out, vec4(0.0)))); \n"
// Select the appropriate set of values according to the gamma sign, on a component-wise basis.
" rev_out = mix(withPositiveGamma, withNegativeGamma, vec4(lessThanEqual($$gamma, vec4(0.0)))); \n"
;
}
shaderText <<
//
// Reverse linear
//
" rev_out = rev_out * $$A + $$B;\n"
//
// Apply clamping
//
" rev_out = mix( rev_out, vec4(0.0), (vec4(1.0) - clamp(sign(rev_out), vec4(0.0), vec4(1.0))) * float($black_clamp$) );\n"
" rev_out = mix( rev_out, vec4(1.0), clamp(sign(rev_out - vec4(1.0)), vec4(0.0), vec4(1.0)) * float($white_clamp$) );\n"
// Ensure we pass on the original alpha value.
" OUT = vec4(rev_out.rgb, inputAlpha); \n"
" } \n"
;
}
_shaderBodyText = shaderText.str();
return _shaderBodyText.c_str();
}
Hash GradeIop::gpuEngine_shader_hash_at(double time)
{
Hash hash;
// We just need to take a hash of the few knobs that affect the source code, evaluated at the
// specified time.
hash.append(knob("reverse")->get_value_at(time));
hash.append(knob("black_clamp")->get_value_at(time));
hash.append(knob("white_clamp")->get_value_at(time));
hash.append(isGammaConstantOne());
return hash;
}
// TODO - This gets (almost) duplicated in Text2.cpp and possibly elsewhere - centralise somewhere.
static void BindKnobVec4(Iop* iop, GPUContext* gpuContext, const char* knobNameWithDollars, float alpha)
{
const int kNumComponents = 4;
const int kNumVectors = 1;
float vec4[kNumComponents] = { 0.0f, 0.0f, 0.0f, alpha };
const char* knobName = knobNameWithDollars + 2;
DD::Image::Knob* knob = iop->knob(knobName);
for (int i = 0; i < kNumComponents; ++i)
vec4[i] = static_cast(knob->get_value(i));
// The alpha values of the knobs are ignored and the fourth components are instead hard-coded to
// values that will cause the alpha to be passed straight through unaffected, as
// soft effects currently only affect RGB.
// This hackette was being done directly in the shader code but now we're using uniforms we handle
// it cpu-side (whether we use 0 or 1 depends on the knob, hance the need for an argument here).
// TODO: add a channels knob to the Grade soft effect and update this accordingly.
vec4[3] = alpha;
bool result = gpuContext->bind(knobNameWithDollars, kNumComponents, kNumVectors, vec4);
mFnAssert(result);
}
void GradeIop::gpuEngine_GL_begin(GPUContext* context)
{
if (nodeContext() != eTimeline)
return;
// Calculate the velues of the linear interpolation coofficients, as in pixel_engine(), then update
// the associated shader uniforms.
float A[4] = { 0.0f };
float B[4] = { 0.0f };
// TODO
// For now we're assuming rgba data so hard-code the alpha components of A and B to 1 and 0, respoectively,
// so we don't end up affecting the alpha part of the image. In the long run we need some kind of channel
// selection in the GPU path.
A[3] = 1.0f;
B[3] = 0.0f;
for (int i = 0; i < 3; ++i) {
float a = whitepoint[i] - blackpoint[i];
a = a ? (white[i] - black[i]) / a : 10000.0f;
a *= multiply[i];
float b = add[i] + black[i] - blackpoint[i] * a;
// If we're doing a reverse grade then the shader code assumes the A and B uniforms have already been
// modified appropriately for direct application.
if (reverse) {
if (a != 1.0f || b) {
if (a)
a = 1.0f / a;
else
a = 1.0f;
b = -b * a;
}
}
A[i] = a;
B[i] = b;
}
context->bind("$$A", 4, 1, A);
context->bind("$$B", 4, 1, B);
if (!isGammaConstantOne()) {
BindKnobVec4(this, context, "$$gamma", 1.0f); // See also the equivalent baking of gamma.a = 1 in gpuEngine_body().
}
}
void GradeIop::gpuEngine_GL_end(GPUContext* context)
{
if (nodeContext() != eTimeline)
return;
}
static Iop* build(Node* node)
{
return (new NukeWrapper(new GradeIop(node)))->channelsRGBoptionalAlpha()->mixLuminance();
}
const Iop::Description GradeIop::d(CLASS, "Color/Correct/Grade", build);