diff options
Diffstat (limited to 'src/windows/native/sun/java2d/d3d/D3DShaderGen.c')
-rw-r--r-- | src/windows/native/sun/java2d/d3d/D3DShaderGen.c | 985 |
1 files changed, 985 insertions, 0 deletions
diff --git a/src/windows/native/sun/java2d/d3d/D3DShaderGen.c b/src/windows/native/sun/java2d/d3d/D3DShaderGen.c new file mode 100644 index 000000000..69cb29893 --- /dev/null +++ b/src/windows/native/sun/java2d/d3d/D3DShaderGen.c @@ -0,0 +1,985 @@ +/* + * Copyright 2007-2008 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. Sun designates this + * particular file as subject to the "Classpath" exception as provided + * by Sun in the LICENSE file that accompanied this code. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/** + * This file contains a standalone program that is used to generate the + * D3DShaders.h file. The program invokes the fxc (D3D Shader Compiler) + * utility, which is part of the DirectX 9/10 SDK. Since most JDK + * developers (other than some Java 2D engineers) do not have the full DXSDK + * installed, and since we do not want to make the JDK build process + * dependent on the full DXSDK installation, we have chosen not to make + * this shader compilation step part of the build process. Instead, it is + * only necessary to compile and run this program when changes need to be + * made to the shader code contained within. Typically, this only happens + * on an as-needed basis by someone familiar with the D3D pipeline. Running + * this program is fairly straightforward: + * + * % rm D3DShaders.h + * % cl D3DShaderGen.c + * % D3DShaderGen.exe + * + * (And don't forget to putback the updated D3DShaders.h file!) + */ + +#include <stdio.h> +#include <process.h> +#include <Windows.h> + +static FILE *fpHeader = NULL; +static char *strHeaderFile = "D3DShaders.h"; + +/** Evaluates to true if the given bit is set on the local flags variable. */ +#define IS_SET(flagbit) \ + (((flags) & (flagbit)) != 0) + +// REMIND +//#define J2dTraceLn(a, b) fprintf(stderr, "%s\n", b); +//#define J2dTraceLn1(a, b, c) fprintf(stderr, b, c); +#define J2dTraceLn(a, b) +#define J2dTraceLn1(a, b, c) + +/************************* General shader support ***************************/ + +static void +D3DShaderGen_WriteShader(char *source, char *target, char *name, int flags) +{ + FILE *fpTmp; + char varname[50]; + char *args[8]; + int val; + + // write source to tmp.hlsl + fpTmp = fopen("tmp.hlsl", "w"); + fprintf(fpTmp, "%s\n", source); + fclose(fpTmp); + + { + PROCESS_INFORMATION pi; + STARTUPINFO si; + char pargs[300]; + sprintf(pargs, + "c:\\progra~1\\mi5889~1\\utilit~1\\bin\\x86\\fxc.exe " + "/T %s /Vn %s%d /Fh tmp.h tmp.hlsl", + // uncomment the following line to generate debug + // info in the shader header file (may be useful + // for testing/debuggging purposes, but it nearly + // doubles the size of the header file and compiled + // shader programs - off for production builds) + //"/Zi /T %s /Vn %s%d /Fh tmp.h tmp.hlsl", + target, name, flags); + fprintf(stderr, "%s\n", pargs); + memset(&si, 0, sizeof(si)); + si.cb = sizeof(si); + si.dwFlags = STARTF_USESTDHANDLES; + //si.hStdOutput = GetStdHandle(STD_OUTPUT_HANDLE); + //fprintf(stderr, "%s\n", pargs); + val = CreateProcess(0, pargs, 0, 0, TRUE, + CREATE_NO_WINDOW, NULL, NULL, &si, &pi); + + { + DWORD code; + do { + GetExitCodeProcess(pi.hProcess, &code); + //fprintf(stderr, "waiting..."); + Sleep(100); + } while (code == STILL_ACTIVE); + + if (code != 0) { + fprintf(stderr, "fxc failed for %s%d\n", name, flags); + } + } + + CloseHandle(pi.hThread); + CloseHandle(pi.hProcess); + } + + // append tmp.h to D3DShaders.h + { + int ch; + fpTmp = fopen("tmp.h", "r"); + while ((ch = fgetc(fpTmp)) != EOF) { + fputc(ch, fpHeader); + } + fclose(fpTmp); + } +} + +static void +D3DShaderGen_WritePixelShader(char *source, char *name, int flags) +{ + D3DShaderGen_WriteShader(source, "ps_2_0", name, flags); +} + +#define MULTI_GRAD_CYCLE_METHOD (3 << 0) +/** Extracts the CycleMethod enum value from the given flags variable. */ +#define EXTRACT_CYCLE_METHOD(flags) \ + ((flags) & MULTI_GRAD_CYCLE_METHOD) + +static void +D3DShaderGen_WriteShaderArray(char *name, int num) +{ + char array[5000]; + char elem[30]; + int i; + + sprintf(array, "const DWORD *%sShaders[] =\n{\n", name); + for (i = 0; i < num; i++) { + if (num == 32 && EXTRACT_CYCLE_METHOD(i) == 3) { + // REMIND: what a hack! + sprintf(elem, " NULL,\n"); + } else { + sprintf(elem, " %s%d,\n", name, i); + } + strcat(array, elem); + } + strcat(array, "};\n"); + + // append to D3DShaders.h + fprintf(fpHeader, "%s\n", array); +} + +/**************************** ConvolveOp support ****************************/ + +static const char *convolveShaderSource = + // image to be convolved + "sampler2D baseImage : register(s0);" + // image edge limits: + // imgEdge.xy = imgMin.xy (anything < will be treated as edge case) + // imgEdge.zw = imgMax.xy (anything > will be treated as edge case) + "float4 imgEdge : register(c0);" + // value for each location in the convolution kernel: + // kernelVals[i].x = offsetX[i] + // kernelVals[i].y = offsetY[i] + // kernelVals[i].z = kernel[i] + "float3 kernelVals[%d] : register(c1);" + "" + "void main(in float2 tc : TEXCOORD0," + " inout float4 color : COLOR0)" + "{" + " float4 sum = imgEdge - tc.xyxy;" + "" + " if (sum.x > 0 || sum.y > 0 || sum.z < 0 || sum.w < 0) {" + // (placeholder for edge condition code) + " color = %s;" + " } else {" + " int i;" + " sum = float4(0, 0, 0, 0);" + " for (i = 0; i < %d; i++) {" + " sum +=" + " kernelVals[i].z *" + " tex2D(baseImage, tc + kernelVals[i].xy);" + " }" + // modulate with current color in order to apply extra alpha + " color *= sum;" + " }" + "" + "}"; + +/** + * Flags that can be bitwise-or'ed together to control how the shader + * source code is generated. + */ +#define CONVOLVE_EDGE_ZERO_FILL (1 << 0) +#define CONVOLVE_5X5 (1 << 1) +#define MAX_CONVOLVE (1 << 2) + +static void +D3DShaderGen_GenerateConvolveShader(int flags) +{ + int kernelMax = IS_SET(CONVOLVE_5X5) ? 25 : 9; + char *edge; + char finalSource[2000]; + + J2dTraceLn1(J2D_TRACE_INFO, + "D3DShaderGen_GenerateConvolveShader: flags=%d", + flags); + + if (IS_SET(CONVOLVE_EDGE_ZERO_FILL)) { + // EDGE_ZERO_FILL: fill in zero at the edges + edge = "float4(0, 0, 0, 0)"; + } else { + // EDGE_NO_OP: use the source pixel color at the edges + edge = "tex2D(baseImage, tc)"; + } + + // compose the final source code string from the various pieces + sprintf(finalSource, convolveShaderSource, + kernelMax, edge, kernelMax); + + D3DShaderGen_WritePixelShader(finalSource, "convolve", flags); +} + +/**************************** RescaleOp support *****************************/ + +static const char *rescaleShaderSource = + // image to be rescaled + "sampler2D baseImage : register(s0);" + // vector containing scale factors + "float4 scaleFactors : register(c0);" + // vector containing offsets + "float4 offsets : register(c1);" + "" + "void main(in float2 tc : TEXCOORD0," + " inout float4 color : COLOR0)" + "{" + " float4 srcColor = tex2D(baseImage, tc);" + "" + // (placeholder for un-premult code) + " %s" + "" + // rescale source value + " float4 result = (srcColor * scaleFactors) + offsets;" + "" + // (placeholder for re-premult code) + " %s" + "" + // modulate with current color in order to apply extra alpha + " color *= result;" + "}"; + +/** + * Flags that can be bitwise-or'ed together to control how the shader + * source code is generated. + */ +#define RESCALE_NON_PREMULT (1 << 0) +#define MAX_RESCALE (1 << 1) + +static void +D3DShaderGen_GenerateRescaleShader(int flags) +{ + char *preRescale = ""; + char *postRescale = ""; + char finalSource[2000]; + + J2dTraceLn1(J2D_TRACE_INFO, + "D3DShaderGen_GenerateRescaleShader: flags=%d", + flags); + + if (IS_SET(RESCALE_NON_PREMULT)) { + preRescale = "srcColor.rgb /= srcColor.a;"; + postRescale = "result.rgb *= result.a;"; + } + + // compose the final source code string from the various pieces + sprintf(finalSource, rescaleShaderSource, + preRescale, postRescale); + + D3DShaderGen_WritePixelShader(finalSource, "rescale", flags); +} + +/**************************** LookupOp support ******************************/ + +static const char *lookupShaderSource = + // source image (bound to texture unit 0) + "sampler2D baseImage : register(s0);" + // lookup table (bound to texture unit 1) + "sampler2D lookupTable : register(s1);" + // offset subtracted from source index prior to lookup step + "float4 offset : register(c0);" + "" + "void main(in float2 tc : TEXCOORD0," + " inout float4 color : COLOR0)" + "{" + " float4 srcColor = tex2D(baseImage, tc);" + // (placeholder for un-premult code) + " %s" + // subtract offset from original index + " float4 srcIndex = srcColor - offset;" + // use source value as input to lookup table (note that + // "v" texcoords are hardcoded to hit texel centers of + // each row/band in texture) + " float4 result;" + " result.r = tex2D(lookupTable, float2(srcIndex.r, 0.125)).r;" + " result.g = tex2D(lookupTable, float2(srcIndex.g, 0.375)).r;" + " result.b = tex2D(lookupTable, float2(srcIndex.b, 0.625)).r;" + // (placeholder for alpha store code) + " %s" + // (placeholder for re-premult code) + " %s" + // modulate with current color in order to apply extra alpha + " color *= result;" + "}"; + +/** + * Flags that can be bitwise-or'ed together to control how the shader + * source code is generated. + */ +#define LOOKUP_USE_SRC_ALPHA (1 << 0) +#define LOOKUP_NON_PREMULT (1 << 1) +#define MAX_LOOKUP (1 << 2) + +static void +D3DShaderGen_GenerateLookupShader(int flags) +{ + char *alpha; + char *preLookup = ""; + char *postLookup = ""; + char finalSource[2000]; + + J2dTraceLn1(J2D_TRACE_INFO, + "D3DShaderGen_GenerateLookupShader: flags=%d", + flags); + + if (IS_SET(LOOKUP_USE_SRC_ALPHA)) { + // when numComps is 1 or 3, the alpha is not looked up in the table; + // just keep the alpha from the source fragment + alpha = "result.a = srcColor.a;"; + } else { + // when numComps is 4, the alpha is looked up in the table, just + // like the other color components from the source fragment + alpha = "result.a = tex2D(lookupTable, float2(srcIndex.a, 0.875)).r;"; + } + if (IS_SET(LOOKUP_NON_PREMULT)) { + preLookup = "srcColor.rgb /= srcColor.a;"; + postLookup = "result.rgb *= result.a;"; + } + + // compose the final source code string from the various pieces + sprintf(finalSource, lookupShaderSource, + preLookup, alpha, postLookup); + + D3DShaderGen_WritePixelShader(finalSource, "lookup", flags); +} + +/************************* GradientPaint support ****************************/ + +/* + * To simplify the code and to make it easier to upload a number of + * uniform values at once, we pack a bunch of scalar (float) values + * into a single float3 below. Here's how the values are related: + * + * params.x = p0 + * params.y = p1 + * params.z = p3 + */ +static const char *basicGradientShaderSource = + "float3 params : register (c0);" + "float4 color1 : register (c1);" + "float4 color2 : register (c2);" + // (placeholder for mask variable) + "%s" + "" + // (placeholder for mask texcoord input) + "void main(%s" + " in float4 winCoord : TEXCOORD%d," + " inout float4 color : COLOR0)" + "{" + " float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);" + " float dist = dot(params.xyz, fragCoord);" + "" + // the setup code for p0/p1/p3 translates/scales to hit texel + // centers (at 0.25 and 0.75) because it is needed for the + // original/fast texture-based implementation, but it is not + // desirable for this shader-based implementation, so we + // re-transform the value here... + " dist = (dist - 0.25) * 2.0;" + "" + " float fraction;" + // (placeholder for cycle code) + " %s" + "" + " float4 result = lerp(color1, color2, fraction);" + "" + // (placeholder for mask modulation code) + " %s" + "" + // modulate with current color in order to apply extra alpha + " color *= result;" + "}"; + +/** + * Flags that can be bitwise-or'ed together to control how the shader + * source code is generated. + */ +#define BASIC_GRAD_IS_CYCLIC (1 << 0) +#define BASIC_GRAD_USE_MASK (1 << 1) +#define MAX_BASIC_GRAD (1 << 2) + +static void +D3DShaderGen_GenerateBasicGradShader(int flags) +{ + int colorSampler = IS_SET(BASIC_GRAD_USE_MASK) ? 1 : 0; + char *cycleCode; + char *maskVars = ""; + char *maskInput = ""; + char *maskCode = ""; + char finalSource[3000]; + + J2dTraceLn1(J2D_TRACE_INFO, + "D3DShaderGen_GenerateBasicGradShader", + flags); + + if (IS_SET(BASIC_GRAD_IS_CYCLIC)) { + cycleCode = + "fraction = 1.0 - (abs(frac(dist * 0.5) - 0.5) * 2.0);"; + } else { + cycleCode = + "fraction = clamp(dist, 0.0, 1.0);"; + } + + if (IS_SET(BASIC_GRAD_USE_MASK)) { + /* + * This code modulates the calculated result color with the + * corresponding alpha value from the alpha mask texture active + * on texture unit 0. Only needed when useMask is true (i.e., only + * for MaskFill operations). + */ + maskVars = "sampler2D mask : register(s0);"; + maskInput = "in float4 maskCoord : TEXCOORD0,"; + maskCode = "result *= tex2D(mask, maskCoord.xy).a;"; + } + + // compose the final source code string from the various pieces + sprintf(finalSource, basicGradientShaderSource, + maskVars, maskInput, colorSampler, cycleCode, maskCode); + + D3DShaderGen_WritePixelShader(finalSource, "grad", flags); +} + +/****************** Shared MultipleGradientPaint support ********************/ + +/** + * These constants are identical to those defined in the + * MultipleGradientPaint.CycleMethod enum; they are copied here for + * convenience (ideally we would pull them directly from the Java level, + * but that entails more hassle than it is worth). + */ +#define CYCLE_NONE 0 +#define CYCLE_REFLECT 1 +#define CYCLE_REPEAT 2 + +/** + * The following constants are flags that can be bitwise-or'ed together + * to control how the MultipleGradientPaint shader source code is generated: + * + * MULTI_GRAD_CYCLE_METHOD + * Placeholder for the CycleMethod enum constant. + * + * MULTI_GRAD_LARGE + * If set, use the (slower) shader that supports a larger number of + * gradient colors; otherwise, use the optimized codepath. See + * the MAX_FRACTIONS_SMALL/LARGE constants below for more details. + * + * MULTI_GRAD_USE_MASK + * If set, apply the alpha mask value from texture unit 1 to the + * final color result (only used in the MaskFill case). + * + * MULTI_GRAD_LINEAR_RGB + * If set, convert the linear RGB result back into the sRGB color space. + */ +//#define MULTI_GRAD_CYCLE_METHOD (3 << 0) +#define MULTI_GRAD_LARGE (1 << 2) +#define MULTI_GRAD_USE_MASK (1 << 3) +#define MULTI_GRAD_LINEAR_RGB (1 << 4) + +// REMIND +#define MAX_MULTI_GRAD (1 << 5) + +/** Extracts the CycleMethod enum value from the given flags variable. */ +//#define EXTRACT_CYCLE_METHOD(flags) \ +// ((flags) & MULTI_GRAD_CYCLE_METHOD) + +/** + * The maximum number of gradient "stops" supported by the fragment shader + * and related code. When the MULTI_GRAD_LARGE flag is set, we will use + * MAX_FRACTIONS_LARGE; otherwise, we use MAX_FRACTIONS_SMALL. By having + * two separate values, we can have one highly optimized shader (SMALL) that + * supports only a few fractions/colors, and then another, less optimal + * shader that supports more stops. + */ +#define MAX_FRACTIONS 8 +#define MAX_FRACTIONS_LARGE MAX_FRACTIONS +#define MAX_FRACTIONS_SMALL 4 + +/** + * The maximum number of gradient colors supported by all of the gradient + * fragment shaders. Note that this value must be a power of two, as it + * determines the size of the 1D texture created below. It also must be + * greater than or equal to MAX_FRACTIONS (there is no strict requirement + * that the two values be equal). + */ +#define MAX_COLORS 16 + +static const char *multiGradientShaderSource = + // gradient texture size (in texels) + "#define TEXTURE_SIZE %d\n" + // maximum number of fractions/colors supported by this shader + "#define MAX_FRACTIONS %d\n" + // size of a single texel + "#define FULL_TEXEL (1.0 / float(TEXTURE_SIZE))\n" + // size of half of a single texel + "#define HALF_TEXEL (FULL_TEXEL / 2.0)\n" + // texture containing the gradient colors + "sampler2D colors : register (s%d);" + // array of gradient stops/fractions and corresponding scale factors + // fractions[i].x = gradientStop[i] + // fractions[i].y = scaleFactor[i] + "float2 fractions[MAX_FRACTIONS] : register (c0);" + // (placeholder for mask variable) + "%s" + // (placeholder for Linear/RadialGP-specific variables) + "%s" + "" + // (placeholder for mask texcoord input) + "void main(%s" + " in float4 winCoord : TEXCOORD%d," + " inout float4 color : COLOR0)" + "{" + " float dist;" + // (placeholder for Linear/RadialGradientPaint-specific code) + " %s" + "" + " float4 result;" + // (placeholder for CycleMethod-specific code) + " %s" + "" + // (placeholder for ColorSpace conversion code) + " %s" + "" + // (placeholder for mask modulation code) + " %s" + "" + // modulate with current color in order to apply extra alpha + " color *= result;" + "}"; + +/* + * Note: An earlier version of this code would simply calculate a single + * texcoord: + * "tc = HALF_TEXEL + (FULL_TEXEL * relFraction);" + * and then use that value to do a single texture lookup, taking advantage + * of the LINEAR texture filtering mode which in theory will do the + * appropriate linear interpolation between adjacent texels, like this: + * "float4 result = tex2D(colors, float2(tc, 0.5));" + * + * The problem with that approach is that on certain hardware (from ATI, + * notably) the LINEAR texture fetch unit has low precision, and would + * for instance only produce 64 distinct grayscales between white and black, + * instead of the expected 256. The visual banding caused by this issue + * is severe enough to likely cause complaints from developers, so we have + * devised a new approach below that instead manually fetches the two + * relevant neighboring texels and then performs the linear interpolation + * using the lerp() instruction (which does not suffer from the precision + * issues of the fixed-function texture filtering unit). This new approach + * requires a few more instructions and is therefore slightly slower than + * the old approach (not more than 10% or so). + */ +static const char *texCoordCalcCode = + "int i;" + "float relFraction = 0.0;" + "for (i = 0; i < MAX_FRACTIONS-1; i++) {" + " relFraction +=" + " clamp((dist - fractions[i].x) * fractions[i].y, 0.0, 1.0);" + "}" + // we offset by half a texel so that we find the linearly interpolated + // color between the two texel centers of interest + "float intPart = floor(relFraction);" + "float tc1 = HALF_TEXEL + (FULL_TEXEL * intPart);" + "float tc2 = HALF_TEXEL + (FULL_TEXEL * (intPart + 1.0));" + "float4 clr1 = tex2D(colors, float2(tc1, 0.5));" + "float4 clr2 = tex2D(colors, float2(tc2, 0.5));" + "result = lerp(clr1, clr2, frac(relFraction));"; + +/** Code for NO_CYCLE that gets plugged into the CycleMethod placeholder. */ +static const char *noCycleCode = + "if (dist <= 0.0) {" + " result = tex2D(colors, float2(0.0, 0.5));" + "} else if (dist >= 1.0) {" + " result = tex2D(colors, float2(1.0, 0.5));" + "} else {" + // (placeholder for texcoord calculation) + " %s" + "}"; + +/** Code for REFLECT that gets plugged into the CycleMethod placeholder. */ +static const char *reflectCode = + "dist = 1.0 - (abs(frac(dist * 0.5) - 0.5) * 2.0);" + // (placeholder for texcoord calculation) + "%s"; + +/** Code for REPEAT that gets plugged into the CycleMethod placeholder. */ +static const char *repeatCode = + "dist = frac(dist);" + // (placeholder for texcoord calculation) + "%s"; + +static void +D3DShaderGen_GenerateMultiGradShader(int flags, char *name, + char *paintVars, char *distCode) +{ + char *maskVars = ""; + char *maskInput = ""; + char *maskCode = ""; + char *colorSpaceCode = ""; + char cycleCode[1500]; + char finalSource[3000]; + int colorSampler = IS_SET(MULTI_GRAD_USE_MASK) ? 1 : 0; + int cycleMethod = EXTRACT_CYCLE_METHOD(flags); + int maxFractions = IS_SET(MULTI_GRAD_LARGE) ? + MAX_FRACTIONS_LARGE : MAX_FRACTIONS_SMALL; + + J2dTraceLn(J2D_TRACE_INFO, "OGLPaints_CreateMultiGradProgram"); + + if (IS_SET(MULTI_GRAD_USE_MASK)) { + /* + * This code modulates the calculated result color with the + * corresponding alpha value from the alpha mask texture active + * on texture unit 0. Only needed when useMask is true (i.e., only + * for MaskFill operations). + */ + maskVars = "sampler2D mask : register(s0);"; + maskInput = "in float4 maskCoord : TEXCOORD0,"; + maskCode = "result *= tex2D(mask, maskCoord.xy).a;"; + } + + if (IS_SET(MULTI_GRAD_LINEAR_RGB)) { + /* + * This code converts a single pixel in linear RGB space back + * into sRGB (note: this code was adapted from the + * MultipleGradientPaintContext.convertLinearRGBtoSRGB() method). + */ + colorSpaceCode = + "result.rgb = 1.055 * pow(result.rgb, 0.416667) - 0.055;"; + } + + if (cycleMethod == CYCLE_NONE) { + sprintf(cycleCode, noCycleCode, texCoordCalcCode); + } else if (cycleMethod == CYCLE_REFLECT) { + sprintf(cycleCode, reflectCode, texCoordCalcCode); + } else { // (cycleMethod == CYCLE_REPEAT) + sprintf(cycleCode, repeatCode, texCoordCalcCode); + } + + // compose the final source code string from the various pieces + sprintf(finalSource, multiGradientShaderSource, + MAX_COLORS, maxFractions, colorSampler, + maskVars, paintVars, maskInput, colorSampler, + distCode, cycleCode, colorSpaceCode, maskCode); + + D3DShaderGen_WritePixelShader(finalSource, name, flags); +} + +/********************** LinearGradientPaint support *************************/ + +static void +D3DShaderGen_GenerateLinearGradShader(int flags) +{ + char *paintVars; + char *distCode; + + J2dTraceLn1(J2D_TRACE_INFO, + "D3DShaderGen_GenerateLinearGradShader", + flags); + + /* + * To simplify the code and to make it easier to upload a number of + * uniform values at once, we pack a bunch of scalar (float) values + * into a single float3 below. Here's how the values are related: + * + * params.x = p0 + * params.y = p1 + * params.z = p3 + */ + paintVars = + "float3 params : register(c16);"; + distCode = + "float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);" + "dist = dot(params.xyz, fragCoord);"; + + D3DShaderGen_GenerateMultiGradShader(flags, "linear", + paintVars, distCode); +} + +/********************** RadialGradientPaint support *************************/ + +static void +D3DShaderGen_GenerateRadialGradShader(int flags) +{ + char *paintVars; + char *distCode; + + J2dTraceLn1(J2D_TRACE_INFO, + "D3DShaderGen_GenerateRadialGradShader", + flags); + + /* + * To simplify the code and to make it easier to upload a number of + * uniform values at once, we pack a bunch of scalar (float) values + * into float3 values below. Here's how the values are related: + * + * m0.x = m00 + * m0.y = m01 + * m0.z = m02 + * + * m1.x = m10 + * m1.y = m11 + * m1.z = m12 + * + * precalc.x = focusX + * precalc.y = 1.0 - (focusX * focusX) + * precalc.z = 1.0 / precalc.z + */ + paintVars = + "float3 m0 : register(c16);" + "float3 m1 : register(c17);" + "float3 precalc : register(c18);"; + + /* + * The following code is derived from Daniel Rice's whitepaper on + * radial gradient performance (attached to the bug report for 6521533). + * Refer to that document as well as the setup code in the Java-level + * BufferedPaints.setRadialGradientPaint() method for more details. + */ + distCode = + "float3 fragCoord = float3(winCoord.x, winCoord.y, 1.0);" + "float x = dot(fragCoord, m0);" + "float y = dot(fragCoord, m1);" + "float xfx = x - precalc.x;" + "dist = (precalc.x*xfx + sqrt(xfx*xfx + y*y*precalc.y))*precalc.z;"; + + D3DShaderGen_GenerateMultiGradShader(flags, "radial", + paintVars, distCode); +} + +/*************************** LCD text support *******************************/ + +// REMIND: Shader uses texture addressing operations in a dependency chain +// that is too complex for the target shader model (ps_2_0) to handle +// (ugh, I guess we can either require ps_3_0 or just use +// the slower pow intrinsic) +#define POW_LUT 0 + +static const char *lcdTextShaderSource = + "float3 srcAdj : register(c0);" + "sampler2D glyphTex : register(s0);" + "sampler2D dstTex : register(s1);" +#if POW_LUT + "sampler3D invgammaTex : register(s2);" + "sampler3D gammaTex : register(s3);" +#else + "float3 invgamma : register(c1);" + "float3 gamma : register(c2);" +#endif + "" + "void main(in float2 tc0 : TEXCOORD0," + " in float2 tc1 : TEXCOORD1," + " inout float4 color : COLOR0)" + "{" + // load the RGB value from the glyph image at the current texcoord + " float3 glyphClr = tex2D(glyphTex, tc0).rgb;" + " if (!any(glyphClr)) {" + // zero coverage, so skip this fragment + " discard;" + " }" + // load the RGB value from the corresponding destination pixel + " float3 dstClr = tex2D(dstTex, tc1).rgb;" + // gamma adjust the dest color using the invgamma LUT +#if POW_LUT + " float3 dstAdj = tex3D(invgammaTex, dstClr).rgb;" +#else + " float3 dstAdj = pow(dstClr, invgamma);" +#endif + // linearly interpolate the three color values + " float3 result = lerp(dstAdj, srcAdj, glyphClr);" + // gamma re-adjust the resulting color (alpha is always set to 1.0) +#if POW_LUT + " color = float4(tex3D(gammaTex, result).rgb, 1.0);" +#else + " color = float4(pow(result, gamma), 1.0);" +#endif + "}"; + +static void +D3DShaderGen_GenerateLCDTextShader() +{ + J2dTraceLn(J2D_TRACE_INFO, "D3DShaderGen_GenerateLCDTextShader"); + + D3DShaderGen_WritePixelShader((char *)lcdTextShaderSource, "lcdtext", 0); +} + +/*************************** AA support *******************************/ + +/* + * This shader fills the space between an outer and inner parallelogram. + * It can be used to draw an outline by specifying both inner and outer + * values. It fills pixels by estimating what portion falls inside the + * outer shape, and subtracting an estimate of what portion falls inside + * the inner shape. Specifying both inner and outer values produces a + * standard "wide outline". Specifying an inner shape that falls far + * outside the outer shape allows the same shader to fill the outer + * shape entirely since pixels that fall within the outer shape are never + * inside the inner shape and so they are filled based solely on their + * coverage of the outer shape. + * + * The setup code renders this shader over the bounds of the outer + * shape (or the only shape in the case of a fill operation) and + * sets the texture 0 coordinates so that 0,0=>0,1=>1,1=>1,0 in those + * texture coordinates map to the four corners of the parallelogram. + * Similarly the texture 1 coordinates map the inner shape to the + * unit square as well, but in a different coordinate system. + * + * When viewed in the texture coordinate systems the parallelograms + * we are filling are unit squares, but the pixels have then become + * tiny parallelograms themselves. Both of the texture coordinate + * systems are affine transforms so the rate of change in X and Y + * of the texture coordinates are essentially constants and happen + * to correspond to the size and direction of the slanted sides of + * the distorted pixels relative to the "square mapped" boundary + * of the parallelograms. + * + * The shader uses the ddx() and ddy() functions to measure the "rate + * of change" of these texture coordinates and thus gets an accurate + * measure of the size and shape of a pixel relative to the two + * parallelograms. It then uses the bounds of the size and shape + * of a pixel to intersect with the unit square to estimate the + * coverage of the pixel. Unfortunately, without a lot more work + * to calculate the exact area of intersection between a unit + * square (the original parallelogram) and a parallelogram (the + * distorted pixel), this shader only approximates the pixel + * coverage, but emperically the estimate is very useful and + * produces visually pleasing results, if not theoretically accurate. + */ +static const char *aaShaderSource = + "void main(in float2 tco : TEXCOORD0," + " in float2 tci : TEXCOORD1," + " inout float4 color : COLOR0)" + "{" + // Calculate the vectors for the "legs" of the pixel parallelogram + // for the outer parallelogram. + " float2 oleg1 = ddx(tco);" + " float2 oleg2 = ddy(tco);" + // Calculate the bounds of the distorted pixel parallelogram. + " float2 omin = min(tco, tco+oleg1);" + " omin = min(omin, tco+oleg2);" + " omin = min(omin, tco+oleg1+oleg2);" + " float2 omax = max(tco, tco+oleg1);" + " omax = max(omax, tco+oleg2);" + " omax = max(omax, tco+oleg1+oleg2);" + // Calculate the vectors for the "legs" of the pixel parallelogram + // for the inner parallelogram. + " float2 ileg1 = ddx(tci);" + " float2 ileg2 = ddy(tci);" + // Calculate the bounds of the distorted pixel parallelogram. + " float2 imin = min(tci, tci+ileg1);" + " imin = min(imin, tci+ileg2);" + " imin = min(imin, tci+ileg1+ileg2);" + " float2 imax = max(tci, tci+ileg1);" + " imax = max(imax, tci+ileg2);" + " imax = max(imax, tci+ileg1+ileg2);" + // Clamp the bounds of the parallelograms to the unit square to + // estimate the intersection of the pixel parallelogram with + // the unit square. The ratio of the 2 rectangle areas is a + // reasonable estimate of the proportion of coverage. + " float2 o1 = clamp(omin, 0.0, 1.0);" + " float2 o2 = clamp(omax, 0.0, 1.0);" + " float oint = (o2.y-o1.y)*(o2.x-o1.x);" + " float oarea = (omax.y-omin.y)*(omax.x-omin.x);" + " float2 i1 = clamp(imin, 0.0, 1.0);" + " float2 i2 = clamp(imax, 0.0, 1.0);" + " float iint = (i2.y-i1.y)*(i2.x-i1.x);" + " float iarea = (imax.y-imin.y)*(imax.x-imin.x);" + // Proportion of pixel in outer shape minus the proportion + // of pixel in the inner shape == the coverage of the pixel + // in the area between the two. + " float coverage = oint/oarea - iint / iarea;" + " color *= coverage;" + "}"; + +static void +D3DShaderGen_GenerateAAParallelogramShader() +{ + J2dTraceLn(J2D_TRACE_INFO, "D3DShaderGen_GenerateAAParallelogramShader"); + + D3DShaderGen_WriteShader((char *)aaShaderSource, "ps_2_a", "aapgram", 0); +} + +/**************************** Main entrypoint *******************************/ + +static void +D3DShaderGen_GenerateAllShaders() +{ + int i; + +#if 1 + // Generate BufferedImageOp shaders + for (i = 0; i < MAX_RESCALE; i++) { + D3DShaderGen_GenerateRescaleShader(i); + } + D3DShaderGen_WriteShaderArray("rescale", MAX_RESCALE); + for (i = 0; i < MAX_CONVOLVE; i++) { + D3DShaderGen_GenerateConvolveShader(i); + } + D3DShaderGen_WriteShaderArray("convolve", MAX_CONVOLVE); + for (i = 0; i < MAX_LOOKUP; i++) { + D3DShaderGen_GenerateLookupShader(i); + } + D3DShaderGen_WriteShaderArray("lookup", MAX_LOOKUP); + + // Generate Paint shaders + for (i = 0; i < MAX_BASIC_GRAD; i++) { + D3DShaderGen_GenerateBasicGradShader(i); + } + D3DShaderGen_WriteShaderArray("grad", MAX_BASIC_GRAD); + for (i = 0; i < MAX_MULTI_GRAD; i++) { + if (EXTRACT_CYCLE_METHOD(i) == 3) continue; // REMIND + D3DShaderGen_GenerateLinearGradShader(i); + } + D3DShaderGen_WriteShaderArray("linear", MAX_MULTI_GRAD); + for (i = 0; i < MAX_MULTI_GRAD; i++) { + if (EXTRACT_CYCLE_METHOD(i) == 3) continue; // REMIND + D3DShaderGen_GenerateRadialGradShader(i); + } + D3DShaderGen_WriteShaderArray("radial", MAX_MULTI_GRAD); + + // Generate LCD text shader + D3DShaderGen_GenerateLCDTextShader(); + + // Genereate Shader to fill Antialiased parallelograms + D3DShaderGen_GenerateAAParallelogramShader(); +#else + /* + for (i = 0; i < MAX_RESCALE; i++) { + D3DShaderGen_GenerateRescaleShader(i); + } + D3DShaderGen_WriteShaderArray("rescale", MAX_RESCALE); + */ + //D3DShaderGen_GenerateConvolveShader(2); + //D3DShaderGen_GenerateLCDTextShader(); + //D3DShaderGen_GenerateLinearGradShader(16); + D3DShaderGen_GenerateBasicGradShader(0); +#endif +} + +int +main(int argc, char **argv) +{ + fpHeader = fopen(strHeaderFile, "a"); + + D3DShaderGen_GenerateAllShaders(); + + fclose(fpHeader); + + return 0; +} |