diff --git a/doc/contributions.txt b/doc/contributions.txt --- a/doc/contributions.txt +++ b/doc/contributions.txt @@ -1190,6 +1190,8 @@ CTS-411 STORM-546 VWR-24509 + STORM-1684 + VWR-27714 SH-2477 STORM-1684 STORM-1819 diff --git a/indra/llrender/llshadermgr.h b/indra/llrender/llshadermgr.h --- a/indra/llrender/llshadermgr.h +++ b/indra/llrender/llshadermgr.h @@ -120,7 +120,7 @@ DEFERRED_SSAO_MAX_RADIUS, DEFERRED_SSAO_FACTOR, DEFERRED_SSAO_FACTOR_INV, - DEFERRED_SSAO_EFFECT_MAT, + DEFERRED_SSAO_EFFECT, DEFERRED_SCREEN_RES, DEFERRED_NEAR_CLIP, DEFERRED_SHADOW_OFFSET, diff --git a/indra/llrender/llshadermgr.cpp b/indra/llrender/llshadermgr.cpp --- a/indra/llrender/llshadermgr.cpp +++ b/indra/llrender/llshadermgr.cpp @@ -1066,7 +1066,7 @@ mReservedUniforms.push_back("ssao_max_radius"); mReservedUniforms.push_back("ssao_factor"); mReservedUniforms.push_back("ssao_factor_inv"); - mReservedUniforms.push_back("ssao_effect_mat"); + mReservedUniforms.push_back("ssao_effect"); mReservedUniforms.push_back("screen_res"); mReservedUniforms.push_back("near_clip"); mReservedUniforms.push_back("shadow_offset"); diff --git a/indra/newview/app_settings/settings.xml b/indra/newview/app_settings/settings.xml --- a/indra/newview/app_settings/settings.xml +++ b/indra/newview/app_settings/settings.xml @@ -7793,7 +7793,7 @@ Type F32 Value - 500.0 + 1500.0 RenderSSAOMaxScale @@ -7804,7 +7804,7 @@ Type U32 Value - 200 + 400 RenderSSAOFactor @@ -7815,19 +7815,19 @@ Type F32 Value - 0.30 + 5.0 RenderSSAOEffect Comment - Multiplier for (1) value and (2) saturation (HSV definition), for areas which are totally occluded. Blends with original color for partly-occluded areas. (Third component is unused.) + Multiplier for (1) value for areas which are totally occluded. Blends with original color for partly-occluded areas. (Only first component is used.) Persist 1 Type Vector3 Value - 0.80 + 0.50 1.00 0.00 @@ -8068,7 +8068,7 @@ Type F32 Value - -0.008 + -0.003 RenderShadowOffset @@ -8161,7 +8161,7 @@ Type F32 Value - -0.001 + 0.0 RenderSpotShadowOffset @@ -8172,7 +8172,7 @@ Type F32 Value - 0.04 + 0.15 RenderShadowResolutionScale @@ -8426,7 +8426,7 @@ Vector3 Value - 3.0 + 1.0 2.0 0.0 @@ -8441,7 +8441,7 @@ Type F32 Value - 1.4 + 3.0 RenderShadowBlurSamples diff --git a/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl b/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl --- a/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/blurLightF.glsl @@ -72,24 +72,23 @@ vec3 pos = getPosition(tc).xyz; vec4 ccol = texture2DRect(lightMap, tc).rgba; - vec2 dlt = kern_scale * delta / (1.0+norm.xy*norm.xy); + vec2 dlt = kern_scale * delta / (vec2(1.0)+norm.xy*norm.xy); dlt /= max(-pos.z*dist_factor, 1.0); vec2 defined_weight = getKern(0).xy; // special case the first (centre) sample's weight in the blur; we have to sample it anyway so we get it for 'free' vec4 col = defined_weight.xyxx * ccol; // relax tolerance according to distance to avoid speckling artifacts, as angles and distances are a lot more abrupt within a small screen area at larger distances - float pointplanedist_tolerance_pow2 = pos.z*pos.z*0.00005; + float pointplanedist_tolerance_pow2 = pos.z*-0.001; // perturb sampling origin slightly in screen-space to hide edge-ghosting artifacts where smoothing radius is quite large - float tc_mod = 0.5*(tc.x + tc.y); // mod(tc.x+tc.y,2) - tc_mod -= floor(tc_mod); - tc_mod *= 2.0; + vec2 tc_v = fract(0.5 * tc.xy); // we now have floor(mod(tc,2.0))*0.5 + float tc_mod = 2.0 * abs(tc_v.x - tc_v.y); // diff of x,y makes checkerboard tc += ( (tc_mod - 0.5) * getKern(1).z * dlt * 0.5 ); for (int i = 1; i < 4; i++) { - vec2 samptc = tc + getKern(i).z*dlt; + vec2 samptc = (tc + getKern(i).z * dlt); vec3 samppos = getPosition(samptc).xyz; float d = dot(norm.xyz, samppos.xyz-pos.xyz);// dist from plane if (d*d <= pointplanedist_tolerance_pow2) @@ -100,7 +99,7 @@ } for (int i = 1; i < 4; i++) { - vec2 samptc = tc - getKern(i).z*dlt; + vec2 samptc = (tc - getKern(i).z * dlt); vec3 samppos = getPosition(samptc).xyz; float d = dot(norm.xyz, samppos.xyz-pos.xyz);// dist from plane if (d*d <= pointplanedist_tolerance_pow2) @@ -111,7 +110,7 @@ } col /= defined_weight.xyxx; - col.y *= col.y; + col.y *= col.y; // delinearize SSAO effect post-blur frag_color = col; } diff --git a/indra/newview/app_settings/shaders/class1/deferred/softenLightF.glsl b/indra/newview/app_settings/shaders/class1/deferred/softenLightF.glsl --- a/indra/newview/app_settings/shaders/class1/deferred/softenLightF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/softenLightF.glsl @@ -62,7 +62,7 @@ uniform vec4 glow; uniform float scene_light_strength; uniform mat3 env_mat; -uniform mat3 ssao_effect_mat; +uniform float ssao_effect; uniform vec3 sun_dir; VARYING vec2 vary_fragcoord; @@ -202,22 +202,15 @@ //increase ambient when there are more clouds vec4 tmpAmbient = ambient + (vec4(1.) - ambient) * cloud_shadow * 0.5; - - /* decrease value and saturation (that in HSV, not HSL) for occluded areas - * // for HSV color/geometry used here, see http://gimp-savvy.com/BOOK/index.html?node52.html - * // The following line of code performs the equivalent of: - * float ambAlpha = tmpAmbient.a; - * float ambValue = dot(vec3(tmpAmbient), vec3(0.577)); // projection onto <1/rt(3), 1/rt(3), 1/rt(3)>, the neutral white-black axis - * vec3 ambHueSat = vec3(tmpAmbient) - vec3(ambValue); - * tmpAmbient = vec4(RenderSSAOEffect.valueFactor * vec3(ambValue) + RenderSSAOEffect.saturationFactor *(1.0 - ambFactor) * ambHueSat, ambAlpha); - */ - tmpAmbient = vec4(mix(ssao_effect_mat * tmpAmbient.rgb, tmpAmbient.rgb, ambFactor), tmpAmbient.a); //haze color setAdditiveColor( vec3(blue_horizon * blue_weight * (sunlight*(1.-cloud_shadow) + tmpAmbient) + (haze_horizon * haze_weight) * (sunlight*(1.-cloud_shadow) * temp2.x + tmpAmbient))); + + // decrease value for occluded areas + tmpAmbient = vec4(mix(ssao_effect * tmpAmbient.rgb, tmpAmbient.rgb, ambFactor), tmpAmbient.a); //brightness of surface both sunlight and ambient setSunlitColor(vec3(sunlight * .5)); diff --git a/indra/newview/app_settings/shaders/class1/deferred/sunLightSSAOF.glsl b/indra/newview/app_settings/shaders/class1/deferred/sunLightSSAOF.glsl --- a/indra/newview/app_settings/shaders/class1/deferred/sunLightSSAOF.glsl +++ b/indra/newview/app_settings/shaders/class1/deferred/sunLightSSAOF.glsl @@ -62,11 +62,8 @@ return pos; } -//calculate decreases in ambient lighting when crowded out (SSAO) -float calcAmbientOcclusion(vec4 pos, vec3 norm) +vec2 getKern(int i) { - float ret = 1.0; - vec2 kern[8]; // exponentially (^2) distant occlusion samples spread around origin kern[0] = vec2(-1.0, 0.0) * 0.125*0.125; @@ -78,40 +75,53 @@ kern[6] = vec2(-0.7071, 0.7071) * 0.875*0.875; kern[7] = vec2(0.7071, -0.7071) * 1.000*1.000; + return kern[i]; +} + +//calculate decreases in ambient lighting when crowded out (SSAO) +float calcAmbientOcclusion(vec4 pos, vec3 norm) +{ vec2 pos_screen = vary_fragcoord.xy; - vec3 pos_world = pos.xyz; vec2 noise_reflect = texture2D(noiseMap, vary_fragcoord.xy/128.0).xy; - float angle_hidden = 0.0; - int points = 0; + // We treat the first sample as the origin, which definitely doesn't obscure itself thanks to being visible for sampling in the first place. + float points = 1.0; + float angle_hidden = 0.0; - float scale = min(ssao_radius / -pos_world.z, ssao_max_radius); + // use a kernel scale that diminishes with distance. + // a scale of less than 32 is just wasting good samples, though. + float scale = max(32.0, min(ssao_radius / -pos.z, ssao_max_radius)); + + // it was found that keeping # of samples a constant was the fastest, probably due to compiler optimizations (unrolling?) + for (int i = 0; i < 8; i++) + { + vec2 samppos_screen = pos_screen + scale * reflect(getKern(i), noise_reflect); + + // if sample is out-of-screen then give it no weight by continuing + if (any(lessThan(samppos_screen.xy, vec2(0.0, 0.0))) || + any(greaterThan(samppos_screen.xy, vec2(screen_res.xy)))) continue; + + vec3 samppos_world = getPosition(samppos_screen).xyz; + + vec3 diff = samppos_world - pos.xyz; + + if (diff.z < ssao_factor && diff.z != 0.0) + { + float dist = length(diff); + float angrel = max(0.0, dot(norm.xyz, diff/dist)); + float distrel = 1.0/(1.0+dist*dist); + float samplehidden = min(angrel, distrel); + + angle_hidden += (samplehidden); + points += 1.0; + } + } + + angle_hidden /= points; - // it was found that keeping # of samples a constant was the fastest, probably due to compiler optimizations unrolling?) - for (int i = 0; i < 8; i++) - { - vec2 samppos_screen = pos_screen + scale * reflect(kern[i], noise_reflect); - vec3 samppos_world = getPosition(samppos_screen).xyz; - - vec3 diff = pos_world - samppos_world; - float dist2 = dot(diff, diff); - - // assume each sample corresponds to an occluding sphere with constant radius, constant x-sectional area - // --> solid angle shrinking by the square of distance - //radius is somewhat arbitrary, can approx with just some constant k * 1 / dist^2 - //(k should vary inversely with # of samples, but this is taken care of later) - - angle_hidden = angle_hidden + float(dot((samppos_world - 0.05*norm - pos_world), norm) > 0.0) * min(1.0/dist2, ssao_factor_inv); - - // 'blocked' samples (significantly closer to camera relative to pos_world) are "no data", not "no occlusion" - points = points + int(diff.z > -1.0); - } - - angle_hidden = min(ssao_factor*angle_hidden/float(points), 1.0); - - ret = (1.0 - (float(points != 0) * angle_hidden)); - - return min(ret, 1.0); + float rtn = (1.0 - angle_hidden); + + return (rtn * rtn); } void main() diff --git a/indra/newview/app_settings/shaders/class2/deferred/softenLightF.glsl b/indra/newview/app_settings/shaders/class2/deferred/softenLightF.glsl --- a/indra/newview/app_settings/shaders/class2/deferred/softenLightF.glsl +++ b/indra/newview/app_settings/shaders/class2/deferred/softenLightF.glsl @@ -63,7 +63,7 @@ uniform float scene_light_strength; uniform mat3 env_mat; uniform vec4 shadow_clip; -uniform mat3 ssao_effect_mat; +uniform float ssao_effect; uniform mat4 inv_proj; uniform vec2 screen_res; @@ -205,22 +205,15 @@ //increase ambient when there are more clouds vec4 tmpAmbient = ambient + (vec4(1.) - ambient) * cloud_shadow * 0.5; - /* decrease value and saturation (that in HSV, not HSL) for occluded areas - * // for HSV color/geometry used here, see http://gimp-savvy.com/BOOK/index.html?node52.html - * // The following line of code performs the equivalent of: - * float ambAlpha = tmpAmbient.a; - * float ambValue = dot(vec3(tmpAmbient), vec3(0.577)); // projection onto <1/rt(3), 1/rt(3), 1/rt(3)>, the neutral white-black axis - * vec3 ambHueSat = vec3(tmpAmbient) - vec3(ambValue); - * tmpAmbient = vec4(RenderSSAOEffect.valueFactor * vec3(ambValue) + RenderSSAOEffect.saturationFactor *(1.0 - ambFactor) * ambHueSat, ambAlpha); - */ - tmpAmbient = vec4(mix(ssao_effect_mat * tmpAmbient.rgb, tmpAmbient.rgb, ambFactor), tmpAmbient.a); - //haze color setAdditiveColor( vec3(blue_horizon * blue_weight * (sunlight*(1.-cloud_shadow) + tmpAmbient) + (haze_horizon * haze_weight) * (sunlight*(1.-cloud_shadow) * temp2.x + tmpAmbient))); + // decrease ambient value for occluded areas + tmpAmbient *= mix(ssao_effect, 1.0, ambFactor); + //brightness of surface both sunlight and ambient setSunlitColor(vec3(sunlight * .5)); setAmblitColor(vec3(tmpAmbient * .25)); diff --git a/indra/newview/app_settings/shaders/class2/deferred/sunLightSSAOF.glsl b/indra/newview/app_settings/shaders/class2/deferred/sunLightSSAOF.glsl --- a/indra/newview/app_settings/shaders/class2/deferred/sunLightSSAOF.glsl +++ b/indra/newview/app_settings/shaders/class2/deferred/sunLightSSAOF.glsl @@ -81,79 +81,85 @@ vec2 getKern(int i) { - vec2 kern[8]; - // exponentially (^2) distant occlusion samples spread around origin - kern[0] = vec2(-1.0, 0.0) * 0.125*0.125; - kern[1] = vec2(1.0, 0.0) * 0.250*0.250; - kern[2] = vec2(0.0, 1.0) * 0.375*0.375; - kern[3] = vec2(0.0, -1.0) * 0.500*0.500; - kern[4] = vec2(0.7071, 0.7071) * 0.625*0.625; - kern[5] = vec2(-0.7071, -0.7071) * 0.750*0.750; - kern[6] = vec2(-0.7071, 0.7071) * 0.875*0.875; - kern[7] = vec2(0.7071, -0.7071) * 1.000*1.000; - - return kern[i]; + vec2 kern[8]; + // exponentially (^2) distant occlusion samples spread around origin + kern[0] = vec2(-1.0, 0.0) * 0.125*0.125; + kern[1] = vec2(1.0, 0.0) * 0.250*0.250; + kern[2] = vec2(0.0, 1.0) * 0.375*0.375; + kern[3] = vec2(0.0, -1.0) * 0.500*0.500; + kern[4] = vec2(0.7071, 0.7071) * 0.625*0.625; + kern[5] = vec2(-0.7071, -0.7071) * 0.750*0.750; + kern[6] = vec2(-0.7071, 0.7071) * 0.875*0.875; + kern[7] = vec2(0.7071, -0.7071) * 1.000*1.000; + + return kern[i]; } //calculate decreases in ambient lighting when crowded out (SSAO) float calcAmbientOcclusion(vec4 pos, vec3 norm) { - float ret = 1.0; + vec2 pos_screen = vary_fragcoord.xy; + vec2 noise_reflect = texture2D(noiseMap, vary_fragcoord.xy/128.0).xy; - vec2 pos_screen = vary_fragcoord.xy; - vec3 pos_world = pos.xyz; - vec2 noise_reflect = texture2D(noiseMap, vary_fragcoord.xy/128.0).xy; + // We treat the first sample as the origin, which definitely doesn't obscure itself thanks to being visible for sampling in the first place. + float points = 1.0; + float angle_hidden = 0.0; - float angle_hidden = 0.0; - float points = 0; + // use a kernel scale that diminishes with distance. + // a scale of less than 32 is just wasting good samples, though. + float scale = max(32.0, min(ssao_radius / -pos.z, ssao_max_radius)); + + // it was found that keeping # of samples a constant was the fastest, probably due to compiler optimizations (unrolling?) + for (int i = 0; i < 8; i++) + { + vec2 samppos_screen = pos_screen + scale * reflect(getKern(i), noise_reflect); + + // if sample is out-of-screen then give it no weight by continuing + if (any(lessThan(samppos_screen.xy, vec2(0.0, 0.0))) || + any(greaterThan(samppos_screen.xy, vec2(screen_res.xy)))) continue; + + vec3 samppos_world = getPosition(samppos_screen).xyz; + + vec3 diff = samppos_world - pos.xyz; + + if (diff.z < ssao_factor // only use sample if it's near enough + && diff.z != 0.0 // Z is very quantized at distance, this lessens noise and eliminates dist==0 + ) + { + float dist = length(diff); + float angrel = max(0.0, dot(norm.xyz, diff/dist)); // how much the origin faces the sample + float distrel = 1.0/(1.0+dist*dist); // 'closeness' of origin to sample + + // origin is obscured by this sample according to how directly the origin is facing the sample and how close the sample is. It has to score high on both to be a good occluder. (a*d) seems the most intuitive way to score, but min(a,d) gives a less localized effect... + float samplehidden = min(angrel, distrel); + + angle_hidden += (samplehidden); + points += 1.0; + } + } + + angle_hidden = angle_hidden / points; - float scale = min(ssao_radius / -pos_world.z, ssao_max_radius); - - // it was found that keeping # of samples a constant was the fastest, probably due to compiler optimizations (unrolling?) - for (int i = 0; i < 8; i++) - { - vec2 samppos_screen = pos_screen + scale * reflect(getKern(i), noise_reflect); - vec3 samppos_world = getPosition(samppos_screen).xyz; - - vec3 diff = pos_world - samppos_world; - float dist2 = dot(diff, diff); - - // assume each sample corresponds to an occluding sphere with constant radius, constant x-sectional area - // --> solid angle shrinking by the square of distance - //radius is somewhat arbitrary, can approx with just some constant k * 1 / dist^2 - //(k should vary inversely with # of samples, but this is taken care of later) - - float funky_val = (dot((samppos_world - 0.05*norm - pos_world), norm) > 0.0) ? 1.0 : 0.0; - angle_hidden = angle_hidden + funky_val * min(1.0/dist2, ssao_factor_inv); - - // 'blocked' samples (significantly closer to camera relative to pos_world) are "no data", not "no occlusion" - float diffz_val = (diff.z > -1.0) ? 1.0 : 0.0; - points = points + diffz_val; - } - - angle_hidden = min(ssao_factor*angle_hidden/points, 1.0); - - float points_val = (points > 0.0) ? 1.0 : 0.0; - ret = (1.0 - (points_val * angle_hidden)); + float rtn = (1.0 - angle_hidden); - ret = max(ret, 0.0); - return min(ret, 1.0); + return (rtn * rtn); } float pcfShadow(sampler2DShadow shadowMap, vec4 stc, float scl, vec2 pos_screen) { + vec2 recip_shadow_res = 1.0 / shadow_res.xy; stc.xyz /= stc.w; stc.z += shadow_bias; - stc.x = floor(stc.x*shadow_res.x + fract(pos_screen.y*0.666666666))/shadow_res.x; + stc.x = floor(stc.x*shadow_res.x + fract(pos_screen.y*0.666666666)) * recip_shadow_res.x; float cs = shadow2D(shadowMap, stc.xyz).x; float shadow = cs; - shadow += shadow2D(shadowMap, stc.xyz+vec3(2.0/shadow_res.x, 1.5/shadow_res.y, 0.0)).x; - shadow += shadow2D(shadowMap, stc.xyz+vec3(1.0/shadow_res.x, -1.5/shadow_res.y, 0.0)).x; - shadow += shadow2D(shadowMap, stc.xyz+vec3(-1.0/shadow_res.x, 1.5/shadow_res.y, 0.0)).x; - shadow += shadow2D(shadowMap, stc.xyz+vec3(-2.0/shadow_res.x, -1.5/shadow_res.y, 0.0)).x; + shadow += shadow2D(shadowMap, stc.xyz+vec3(2.0*recip_shadow_res.x, 1.5*recip_shadow_res.y, 0.0)).x; + shadow += shadow2D(shadowMap, stc.xyz+vec3(1.0*recip_shadow_res.x, -1.5*recip_shadow_res.y, 0.0)).x; + shadow += shadow2D(shadowMap, stc.xyz+vec3(-1.0*recip_shadow_res.x, 1.5*recip_shadow_res.y, 0.0)).x; + shadow += shadow2D(shadowMap, stc.xyz+vec3(-2.0*recip_shadow_res.x, -1.5*recip_shadow_res.y, 0.0)).x; return shadow*0.2; } @@ -167,8 +173,7 @@ float cs = shadow2D(shadowMap, stc.xyz).x; float shadow = cs; - vec2 off = 1.0/proj_shadow_res; - off.y *= 1.5; + vec2 off = vec2(1.0, 1.5) / proj_shadow_res.xy; shadow += shadow2D(shadowMap, stc.xyz+vec3(off.x*2.0, off.y, 0.0)).x; shadow += shadow2D(shadowMap, stc.xyz+vec3(off.x, -off.y, 0.0)).x; diff --git a/indra/newview/pipeline.cpp b/indra/newview/pipeline.cpp --- a/indra/newview/pipeline.cpp +++ b/indra/newview/pipeline.cpp @@ -7772,14 +7772,7 @@ shader.uniform1f(LLShaderMgr::DEFERRED_SSAO_FACTOR_INV, 1.0/ssao_factor); LLVector3 ssao_effect = RenderSSAOEffect; - F32 matrix_diag = (ssao_effect[0] + 2.0*ssao_effect[1])/3.0; - F32 matrix_nondiag = (ssao_effect[0] - ssao_effect[1])/3.0; - // This matrix scales (proj of color onto <1/rt(3),1/rt(3),1/rt(3)>) by - // value factor, and scales remainder by saturation factor - F32 ssao_effect_mat[] = { matrix_diag, matrix_nondiag, matrix_nondiag, - matrix_nondiag, matrix_diag, matrix_nondiag, - matrix_nondiag, matrix_nondiag, matrix_diag}; - shader.uniformMatrix3fv(LLShaderMgr::DEFERRED_SSAO_EFFECT_MAT, 1, GL_FALSE, ssao_effect_mat); + shader.uniform1f(LLShaderMgr::DEFERRED_SSAO_EFFECT, ssao_effect[0]); //F32 shadow_offset_error = 1.f + RenderShadowOffsetError * fabsf(LLViewerCamera::getInstance()->getOrigin().mV[2]); F32 shadow_bias_error = RenderShadowBiasError * fabsf(LLViewerCamera::getInstance()->getOrigin().mV[2])/3000.f;