JurassicParkTrespasser/jp2_pc/Source/Lib/Renderer/Primitives/P6/DrawSubTriangleEx.inl

3450 lines
85 KiB
C++

/***********************************************************************************************
*
* Copyright © DreamWorks Interactive, 1997.
*
* Contents:
* Specialized (non-template) versions of the DrawSubtriangle function.
* Optimized for the Pentium Pro Processor
*
***********************************************************************************************
*
* $Log:: /JP2_PC/Source/Lib/Renderer/Primitives/P6/DrawSubTriangleEx.cpp $
*
* 14 97/12/03 2:54p Pkeet
* Fixed crash in copy primitive.
*
* 13 97/12/03 1:25p Pkeet
* New P6 optimized version created from P5 version.
*
* 12 97.12.03 1:15p Mmouni
*
* 11 97.11.05 7:52p Mmouni
* Added terrain fog mask.
*
* 10 10/01/97 2:44p Mmouni
* Added seperate terrain specific copy primitives.
*
* 9 9/01/97 8:05p Rwyatt
*
* 8 8/19/97 9:08p Bbell
* Added asserts.
*
* 7 8/17/97 10:39p Rwyatt
* Removed old unused labels
*
* 6 8/15/97 8:43p Bbell
* Added fogging for terrain texturing.
*
* 5 8/15/97 12:44a Rwyatt
* Big optimizations, all 16 bit primitives are now optimized and perspective is in line.
* Clamping in the perspective code is controlled by VER_CLAMP_UV_16BIT this is independant of
* clamping in the general perspective code.
*
* 4 8/01/97 6:55p Rwyatt
* Linear, No Clut, Non-transparent has had its scan lines optimized for Pentium Pro. Y loop
* still needs doing.
*
* 3 97/07/18 3:29p Pkeet
* Removed use of the 'b_right' parameter from DrawSubTriangle.
*
* 2 97/07/16 15:49 Speter
* Commented out redundant static delcarations now that all DrawSubTriangles are included into
* one file (now just declared in DrawSubTriangleEx.cpp.)
*
* 1 7/08/97 1:15p Rwyatt
* Initial Implementation of Pentium Pro rendering specific optimizations
*
***********************************************************************************************
***********************************************************************************************
* 22 6/25/97 3:28p Rwyatt
* Re-aranged code
*
* 21 6/17/97 4:10p Rwyatt
* Optimized perspective call
*
* 20 97/06/16 10:16p Pkeet
* Moved linear primitives into non-template code.
*
* 19 97/06/16 8:47p Pkeet
* Replaced template calls with explicit C++ code for perspective routines.
*
* 18 6/15/97 1:31a Bbell
* Removed unnecessary include.
*
* 17 97/06/12 5:56p Pkeet
* Added capability to render only every other scanline and to double the number of scanlines.
*
* 16 97/06/11 4:58p Pkeet
* Added opaque versions of the optimized copy primitives.
*
* 15 6/11/97 4:11p Rwyatt
* Removed 3 AGIs and 2 PRO stalls from the perspective primitive. These stalls were costing
* about 40 clocks and the net result is 50% speed increase!!!!!!!!
*
* 14 6/11/97 2:45p Rwyatt
* Removed 2 AGI stalls from the linear copy
*
* 13 6/11/97 2:04a Rwyatt
* Slight;y optimized, need to run through V-Tune so we can spot any stalls
*
* 12 6/10/97 10:02p Rwyatt
* Blazing linear transparent copy
*
* 11 6/10/97 4:34p Rwyatt
* Slightly optimized perspective copy primitive
*
* 10 97/06/10 2:27p Pkeet
* Moved the bumpmap routines into a seperate module.
*
* 9 97/06/10 1:20p Pkeet
* Added initial assembly optimizations for bumpmap linear routines.
*
* 8 97/06/10 12:53p Pkeet
* Fixed bug in bumpmap assembly.
*
* 7 97/06/10 10:51a Pkeet
* Added initial assembly optimization passes for perspective corrected bumpmapping for both
* transparent and non-transparent versions.
*
* 6 97/06/09 6:38p Pkeet
* Algorithmic optimizations.
*
* 5 97/06/08 4:16p Pkeet
* Added perspective correction bump map primitives.
*
* 4 97/06/06 8:51p Pkeet
* Added linear.
*
* 3 97/06/06 6:44p Pkeet
* Moved the inner loop to self-modifying code.
*
* 2 97/06/06 4:36p Pkeet
* It works again.
*
* 1 97/06/04 6:46p Pkeet
* Initial specialized version of DrawSubTriangle.
*
**********************************************************************************************/
#include "Common.hpp"
#include "AsmSupport.hpp"
#include "Lib/View/Raster.hpp"
#include "Lib/Renderer/Primitives/DrawTriangle.hpp"
//
// Turn off the global optimized because it was re-arranging the assembly code
// without account for the self-modification offsets.
//
#pragma optimize("g", off)
//*****************************************************************************************
//
// Perspective, No Clut, Transparent.
//
void DrawSubtriangle(TCopyPerspTrans* pscan, CDrawPolygon<TCopyPerspTrans>* pdtri)
{
fixed fx_inc;
fixed fx_diff;
float f_inc_uinvz;
float f_inc_vinvz;
float f_inc_invz;
TCopyPerspTrans* plinc = &pdtri->pedgeBase->lineIncrement;
__asm
{
//
// Local copies of edge stepping values.
//
// fx_inc = pdtri->pedgeBase->lineIncrement.fxX;
// fx_diff = pdtri->pedgeBase->lineIncrement.fxXDifference;
// f_inc_uinvz = pdtri->pedgeBase->lineIncrement.indCoord.fUInvZ;
// f_inc_vinvz = pdtri->pedgeBase->lineIncrement.indCoord.fVInvZ;
// f_inc_invz = pdtri->pedgeBase->lineIncrement.indCoord.fInvZ;
//
mov eax,[plinc]
mov esi,[pdtri] // Pointer to polygon object.
mov ecx,[pscan] // Pointer to scanline object.
mov ebx,[eax]TCopyPerspTrans.fxX
mov edx,[eax]TCopyPerspTrans.fxXDifference
mov fx_inc,ebx
mov fx_diff,edx
mov ebx,[eax]TCopyPerspTrans.indCoord.fUInvZ
mov edx,[eax]TCopyPerspTrans.indCoord.fVInvZ
mov f_inc_uinvz,ebx
mov ebx,[eax]TCopyPerspTrans.indCoord.fInvZ
mov f_inc_vinvz,edx
mov f_inc_invz,ebx
//
// Iterate through the scanlines that intersect the subtriangle.
//
Y_LOOP:
push ebp // Save ebp.
// Check to see if we sould skip this scanline.
mov eax,[esi]CDrawPolygonBase.iY
mov ebx,[bEvenScanlinesOnly]
and eax,ebx
jnz END_OF_SCANLINE
mov ebx,[ecx]TCopyPerspTrans.fxX.i4Fx
mov eax,[esi]CDrawPolygonBase.fxLineLength.i4Fx
add ebx,eax
mov eax,[ecx]TCopyPerspTrans.fxX.i4Fx
sar ebx,16
mov edx,[esi]CDrawPolygonBase.iLineStartIndex
sar eax,16
add edx,ebx
sub eax,ebx // i_pixel
jge END_OF_SCANLINE
mov [i_screen_index],edx
mov [i_pixel],eax
//---------------------------------------------------------------------------
// Caclulate values for the first correction span, and start the divide for
// the next span.
//
// Copy global texture values.
mov ebx,[ecx]TCopyPerspTrans.indCoord.fUInvZ
mov esi,[ecx]TCopyPerspTrans.indCoord.fVInvZ
mov edi,[ecx]TCopyPerspTrans.indCoord.fInvZ
mov fGUInvZ,ebx
mov fGVInvZ,esi
mov fGInvZ,edi
mov ebx,fDUInvZEdge
mov esi,fDVInvZEdge
mov edi,fDInvZEdge
mov fDUInvZScanline,ebx
mov fDVInvZScanline,esi
mov fDInvZScanline,edi
mov ebx,[iSubdivideLen]
// scan line is +ve
add eax,ebx
jle short DONE_DIVIDE_PIXEL
// calc the new +ve ratio
fild [i_pixel]
fld fInvSubdivideLen
fchs
fmulp st(1),st(0) // st(0) = (-)fInvSubdivideLen * i_pixel;
fld fDUInvZScanline // U C
xor eax,eax
fmul st(0),st(1) // U*C C
fxch st(1) // C U*C
fld fDVInvZScanline // V C U*C
fxch st(1) // C V U*C
fmul st(1),st(0) // C V*C U*C
// stall(1)
fmul fDInvZScanline // Z*C V*C U*C
fxch st(2) // U*C V*C Z*C
fstp fDUInvZScanline // V*C Z*C
fstp fDVInvZScanline // Z*C
fstp fDInvZScanline
DONE_DIVIDE_PIXEL:
// Get current u, v and z values.
mov [iNextSubdivide],eax
mov ebx,dword ptr[fGInvZ] // f_z = fInverse(fGInvZ);
mov eax,iFI_SIGN_EXPONENT_SUB
sub eax,ebx
and ebx,iFI_MASK_MANTISSA
sar ebx,iSHIFT_MANTISSA
and eax,iFI_MASK_SIGN_EXPONENT
fld [fGUInvZ] // U/Z
add eax,dword ptr[i4InverseMantissa + ebx*4]
mov ebx,[bClampUV]
mov dword ptr[f_z],eax
#if (VER_CLAMP_UV_16BIT==TRUE)
//---------------------------------------------------------------------------
// Set current texture coordinates (clamped).
fmul [f_z]
fld [fGVInvZ]
fmul [f_z]
fxch st(1)
// stall(1)
fstp [f_u]
// Clamp f_u
mov eax,[f_u]
mov ebx,fTexEdgeTolerance
mov ecx,fTexWidth
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ecx
cmovg (_eax,_ecx)
mov ecx,fTexHeight
mov [f_u],eax
fld [f_u]
fadd [dFastFixed16Conversion]
fxch st(1)
// Clamp f_v
fstp [f_v]
mov eax,[f_v]
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ecx
cmovg (_eax,_ecx)
mov [f_v],eax
fld [f_v]
fadd [dFastFixed16Conversion]
fxch st(1)
fstp [d_temp_a]
fld [fGUInvZ]
fxch st(1)
fstp [d_temp_b]
// Increment u, v and z values.
fadd [fDUInvZScanline]
fld [fGVInvZ]
fadd [fDVInvZScanline]
fxch st(1)
fld [fGInvZ]
fadd [fDInvZScanline]
// Setup esi=uFrac, ecx=vFrac, edx=UVInt for Abrash texture loop.
mov edx,dword ptr[d_temp_a] // U (16.16)
mov eax,dword ptr[d_temp_b] // V (16.16)
sar eax,16 // Integral V
mov ecx,[iTexWidth] // Texture width.
imul eax,ecx // iv*twidth
sar edx,16 // Integral U
mov esi,dword ptr[d_temp_a] // Copy of U
shl esi,16 // UFrac
mov ecx,dword ptr[d_temp_b] // Copy of V
shl ecx,16 // VFrac
mov ebx,[pvTextureBitmap] // Texture base pointer.
shr ebx,1 // Into pixel offset.
add edx,eax // edx = iu + iv*twidth
add edx,ebx // Add texture base to edx.
fstp [fGInvZ]
// Get next u, v and z values.
mov ebx,dword ptr[fGInvZ] // f_next_z = fInverse(fGInvZ);
mov eax,iFI_SIGN_EXPONENT_SUB
sub eax,ebx
and ebx,iFI_MASK_MANTISSA
sar ebx,iSHIFT_MANTISSA
and eax,iFI_MASK_SIGN_EXPONENT
fst [fGUInvZ]
mov ebx,dword ptr[i4InverseMantissa + ebx*4]
mov edi,[i_pixel]
add eax,ebx
mov ebx,[iNextSubdivide]
mov dword ptr[f_next_z],eax
sub edi,ebx
// Set new texture coordinate increments.
fmul [f_next_z]
fxch st(1)
fst [fGVInvZ]
fmul [f_next_z]
fxch st(1)
fstp [fU] // V
// Clamp fU
mov eax,[fU]
mov ebx,fTexEdgeTolerance
mov ebp,fTexWidth
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ebp
cmovg (_eax,_ebp)
mov ebp,fTexHeight
mov [fU],eax
fld [fU] // U,V
fsub [f_u] // U-fu,V
fxch st(1) // V,U-fu
fstp [fV] // U-fu
// Clamp fV
mov eax,[fV]
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ebp
cmovg (_eax,_ebp)
mov [fV],eax
fld [fV] // V U-fu
#else // (VER_CLAMP_UV_16BIT==TRUE)
//---------------------------------------------------------------------------
// Set current texture coordinates (not clamped).
fmul [f_z] // U
fld [fGVInvZ] // V/Z U
fmul [f_z] // V U
fxch st(1) // U V
// stall(1)
fst [f_u]
fadd [dFastFixed16Conversion] // iU V
fxch st(1) // V iU
fst [f_v]
fadd [dFastFixed16Conversion] // iV iU
fxch st(1) // iU iV
fstp [d_temp_a] // iV
fld [fGUInvZ] // U/Z iV
fxch st(1) // iV U/Z
fstp [d_temp_b] // U/Z
// Increment u, v and z values.
fadd [fDUInvZScanline] // U2/Z
fld [fGVInvZ]
fadd [fDVInvZScanline] // V2/Z U2/Z
fxch st(1) // U2/Z V2/Z
fld [fGInvZ] // 1/Z U2/Z V2/Z
fadd [fDInvZScanline] // 1/Z2 U2/Z V2/Z
// Setup esi=uFrac, ecx=vFrac, edx=UVInt for Abrash texture loop.
mov edx,dword ptr[d_temp_a] // U (16.16)
mov eax,dword ptr[d_temp_b] // V (16.16)
sar eax,16 // Integral V
mov ecx,[iTexWidth] // Texture width.
imul eax,ecx // iv*twidth
sar edx,16 // Integral U
mov esi,dword ptr[d_temp_a] // Copy of U
shl esi,16 // UFrac
mov ecx,dword ptr[d_temp_b] // Copy of V
shl ecx,16 // VFrac
mov ebx,[pvTextureBitmap] // Texture base pointer.
shr ebx,1 // Into pixel offset.
add edx,eax // iu + iv*twidth
add edx,ebx // Add to edx.
fstp [fGInvZ]
// Get next u, v and z values.
mov ebx,dword ptr[fGInvZ] // f_next_z = fInverse(fGInvZ);
mov eax,iFI_SIGN_EXPONENT_SUB
sub eax,ebx
and ebx,iFI_MASK_MANTISSA
sar ebx,iSHIFT_MANTISSA
and eax,iFI_MASK_SIGN_EXPONENT
fst [fGUInvZ]
mov ebx,dword ptr[i4InverseMantissa + ebx*4]
mov edi,[i_pixel]
add eax,ebx
mov ebx,[iNextSubdivide]
mov dword ptr[f_next_z],eax
sub edi,ebx
// Set new texture coordinate increments.
fmul [f_next_z] // U,V/Z
fxch st(1) // V/Z,U
fst [fGVInvZ]
fmul [f_next_z] // V,U
fxch st(1) // U,V
fst [fU] // U,V
fsub [f_u] // U-fu,V
fxch st(1) // V,U-fu
fst [fV]
#endif // (VER_CLAMP_UV_16BIT==TRUE)
// ---------------------------------------------------------------------------------
// Both clmaped and non-clamped primitives end up here..
fsub [f_v] // V-fv,U-fu
fxch st(1) // U,V
fmul float ptr[fInverseIntTable+edi*4] // (C*U),V
fxch st(1) // V,(C*U)
// stall(1)
fmul float ptr[fInverseIntTable+edi*4] // (C*V),(C*U)
fxch st(1) // (C*U),(C*V)
fadd [dFastFixed16Conversion]
fxch st(1)
// stall(1)
fadd [dFastFixed16Conversion]
fxch st(1)
// stall(1)
fstp [d_temp_a]
fstp [d_temp_b]
mov edi,dword ptr[d_temp_a] // uslope
mov eax,dword ptr[d_temp_b] // vslope
sar edi,16 // integer part of uslope
mov ebp,dword ptr[d_temp_a] // uslope again
shl ebp,16 // fractional part of uslope
mov ebx,eax // vslope again
sar eax,16 // integer part of vslope
mov [w2dDeltaTex.uUFrac],ebp // store UFrac
shl ebx,16 // fractional part of vslope
mov ebp,[iTexWidth] // Load texture width.
imul eax,ebp // ivslope*twidth
add edi,eax // ivslope*twidth + iuslope
mov [w2dDeltaTex.uVFrac],ebx // store VFrac
mov [w2dDeltaTex.iUVInt+4],edi // store integer stride
add edi,ebp // add twidth to integer stride
mov [w2dDeltaTex.iUVInt],edi // store integer stride + twidth
mov eax,[iNextSubdivide]
test eax,eax // Next subdivision is zero length?
jz SUBDIVISION_LOOP
//iCacheNextSubdivide = iSetNextDividePixel(iNextSubdivide);
mov edi,[iSubdivideLen]
// scan line is +ve
add eax,edi
jle short DONE_DIVIDE_PIXEL_CACHE
// calc the new +ve ratio
fild iNextSubdivide
fld fInvSubdivideLen
fchs
fmulp st(1),st(0) // st(0) = (-)fInvSubdivideLen * i_pixel;
fld fDUInvZScanline // U C
xor eax,eax
fmul st(0),st(1) // U*C C
fxch st(1) // C U*C
fld fDVInvZScanline // V C U*C
fxch st(1) // C V U*C
fmul st(1),st(0) // C V*C U*C
mov [iCacheNextSubdivide],eax
fmul fDInvZScanline // Z*C V*C U*C
fxch st(2) // U*C V*C Z*C
fst fDUInvZScanline
fadd fGUInvZ
fxch st(1)
fst fDVInvZScanline
fadd fGVInvZ
fxch st(2)
fst fDInvZScanline
fadd fGInvZ
fxch st(2)
fstp fGVInvZ
fstp fGUInvZ
fst fGInvZ
fdivr fOne // Start the next division.
jmp short SUBDIVISION_LOOP
DONE_DIVIDE_PIXEL_CACHE:
fld fDUInvZScanline // U
fadd fGUInvZ
fld fDVInvZScanline // V U
fadd fGVInvZ
fld fDInvZScanline // Z V U
fadd fGInvZ
fxch st(2) // U V Z
fstp fGUInvZ
fstp fGVInvZ
fst fGInvZ
fdivr fOne // Start the next division.
mov [iCacheNextSubdivide],eax
SUBDIVISION_LOOP:
//---------------------------------------------------------------------------
// Start the next subdivision.
// ecx,edx,esi = texture values
//
mov edi,[i_pixel]
mov eax,[iNextSubdivide] // eax = i_pixel_inner
sub edi,eax // edi = inner loop count
mov ebp,gsGlobals.pvScreen // Pointer the screen.
mov ebx,[i_screen_index] // Load scanline offset.
add ebx,eax // Add scanline offset to i_pixel
mov [i_pixel],eax // Save i_pixel.
lea ebp,[ebp + ebx*2] // Base of span in ebp.
mov [pvBaseOfLine],ebp // Save pointer to base of line.
jmp short INNER_LOOP
ALIGN 16
//
// Draw Pixels
//
INNER_LOOP:
add ecx,[w2dDeltaTex]CWalk2D.uVFrac // Step V fraction.
movzx eax,word ptr[edx*2] // Read texture value (low).
mov ebx,[w2dDeltaTex]CWalk2D.uUFrac // Load U fraction step.
sbb ebp,ebp // Get borrow from V fraction step.
add esi,ebx // Step U fraction.
mov ebx,[pvBaseOfLine] // Load pointer to base of line.
adc edx,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + u carry.
test eax,eax
jz short SKIP_TRANSPARENT
mov [ebx + edi*2],ax // Store pixel.
SKIP_TRANSPARENT:
inc edi
jnz short INNER_LOOP
cmp [i_pixel],0
je END_OF_SCANLINE
//----------------------------------------------------------------------------------
// Implement perspective correction.
// stack top contains the result of the divide
// Preserve: ecx,edx,esi
//
mov edi,[iNextSubdivide]
mov eax,[iCacheNextSubdivide]
sub edi,eax
jz short EXIT_BEGIN_NEXT_QUICK_END
fld [fGUInvZ] // U/Z,Z
fxch st(1) // Z,U/Z
fmul st(1),st(0) // Z,U
mov ebx,[bClampUV]
fmul [fGVInvZ] // V,U
#if (VER_CLAMP_UV_16BIT==TRUE)
//----------------------------------------------------------------------------------
// Clamp U and V
fxch st(1)
fstp [f_u]
fstp [f_v]
mov ebp,[f_u]
mov ebx,[fTexEdgeTolerance]
mov eax,[f_v]
cmp eax,ebx
cmovl (_eax,_ebx)
cmp ebp,ebx
cmovl (_ebp,_ebx)
mov ebx,[fTexWidth]
cmp ebp,ebx
cmovg (_ebp,_ebx)
mov ebx,[fTexHeight]
cmp eax,ebx
cmovg (_eax,_ebx)
mov [f_u],ebp
mov [f_v],eax
fld [f_u]
fld [f_v]
#endif // (VER_CLAMP_UV_16BIT==TRUE)
//----------------------------------------------------------------------------------
// Initialize walking values
fld st(1) // U,V,U
fsub [fU] // U-fU,V,U
fld st(1) // V,U-fU,V,U
fsub [fV] // V-fV,U-fU,V,U
fxch st(1) // U-fU,V-fV,V,U
fmul float ptr[fInverseIntTable+edi*4] // (U-fU)*C,V-fV,V,U
fxch st(2) // V,V-fV,(U-fU)*C,U
fstp [fV] // V-fV,(U-fU)*C,U
fmul float ptr[fInverseIntTable+edi*4] // (V-fV)*C,(U-fU)*C,U
fxch st(1) // (U-fU)*C,(V-fV)*C,U
fadd [dFastFixed16Conversion] // f(U-fU)*C,(V-fV)*C,U
fxch st(1) // (V-fV)*C,f(U-fU)*C,U
// stall(1)
fadd [dFastFixed16Conversion] // f(V-fV)*C,f(U-fU)*C,U
fxch st(2) // U,f(U-fU)*C,f(V-fV)*C
fstp [fU] // f(U-fU)*C,f(V-fV)*C
fstp [d_temp_a] // f(V-fV)*C
fstp [d_temp_b]
mov edi,dword ptr[d_temp_a] // uslope
mov eax,dword ptr[d_temp_b] // vslope
sar edi,16 // integer part of uslope
mov ebp,dword ptr[d_temp_a] // uslope again
shl ebp,16 // fractional part of uslope
mov ebx,eax // vslope again
sar eax,16 // integer part of vslope
mov [w2dDeltaTex.uUFrac],ebp // store UFrac
shl ebx,16 // fractional part of vslope
mov ebp,[iTexWidth] // Load texture width.
imul eax,ebp // ivslope*twidth
add edi,eax // ivslope*twidth + iuslope
mov [w2dDeltaTex.uVFrac],ebx // store VFrac
mov [w2dDeltaTex.iUVInt+4],edi // store integer stride
add edi,ebp // add twidth to integer stride
mov [w2dDeltaTex.iUVInt],edi // store integer stride + twidth
mov eax,[iCacheNextSubdivide]
//----------------------------------------------------------------------------------
// Begin Next Subdivision
mov [iNextSubdivide],eax // eax == iNextSubdivide
test eax,eax // Next subdivision is zero length?
jz SUBDIVISION_LOOP
// scan line is +ve
add eax,[iSubdivideLen]
jle short DONE_DIVIDE_PIXEL_CACHE_END
// calc the new +ve ratio
fild iNextSubdivide
fld fInvSubdivideLen
fchs
fmulp st(1),st(0) // st(0) = (-)fInvSubdivideLen * i_pixel;
fld fDUInvZScanline // U C
xor eax,eax
fmul st(0),st(1) // U*C C
fxch st(1) // C U*C
fld fDVInvZScanline // V C U*C
fxch st(1) // C V U*C
fmul st(1),st(0) // C V*C U*C
mov [iCacheNextSubdivide],eax
fmul fDInvZScanline // Z*C V*C U*C
fxch st(2) // U*C V*C Z*C
fst fDUInvZScanline // U V Z
fadd fGUInvZ
fxch st(1) // V U Z
fst fDVInvZScanline
fadd fGVInvZ
fxch st(2) // Z U V
fst fDInvZScanline
fadd fGInvZ
fxch st(2) // V U Z
fstp fGVInvZ
fstp fGUInvZ
fst fGInvZ
fdivr fOne // Start the next division.
jmp SUBDIVISION_LOOP
DONE_DIVIDE_PIXEL_CACHE_END:
fld fDUInvZScanline // U
fadd fGUInvZ
fld fDVInvZScanline // V U
fadd fGVInvZ
fld fDInvZScanline // Z V U
fadd fGInvZ
fxch st(2) // U V Z
fstp fGUInvZ
fstp fGVInvZ
fst fGInvZ
fdivr fOne // Start the next division.
mov [iCacheNextSubdivide],eax
jmp SUBDIVISION_LOOP
// When the sub divide equals the cached sub-divide we end up here but
// there is an element left on the fp stack.
EXIT_BEGIN_NEXT_QUICK_END:
// Dump value on stack
ffree st(0)
fincstp
jmp SUBDIVISION_LOOP
END_OF_SCANLINE:
pop ebp // Restore ebp.
//
// Increment the base edge.
//
mov esi,[pdtri] // Pointer to polygon object.
mov ecx,[pscan] // Pointer to scanline object.
// Step length, line starting address.
mov eax,[esi]CDrawPolygonBase.fxLineLength.i4Fx
mov ebx,[esi]CDrawPolygonBase.iLineStartIndex
add eax,[esi]CDrawPolygonBase.fxDeltaLineLength.i4Fx
add ebx,[gsGlobals.u4LinePixels]
mov [esi]CDrawPolygonBase.fxLineLength.i4Fx,eax
mov [esi]CDrawPolygonBase.iLineStartIndex,ebx
// Step edge values.
mov eax,[ecx]TCopyPerspTrans.fxX.i4Fx
mov ebx,[ecx]TCopyPerspTrans.fxXDifference.i4Fx
fld [ecx]TCopyPerspTrans.indCoord.fUInvZ
fadd [f_inc_uinvz]
fld [ecx]TCopyPerspTrans.indCoord.fVInvZ
fadd [f_inc_vinvz]
fld [ecx]TCopyPerspTrans.indCoord.fInvZ
fadd [f_inc_invz]
add eax,[fx_inc]
add ebx,[fx_diff]
jge short NO_UNDERFLOW
// Underflow.
add ebx,0x00010000
jmp short BORROW_OR_CARRY
NO_UNDERFLOW:
cmp ebx,0x00010000
jle short NO_OVERFLOW
// Overflow.
sub ebx,0x00010000
jmp short BORROW_OR_CARRY
NO_OVERFLOW:
fstp [ecx]TCopyPerspTrans.indCoord.fInvZ
fstp [ecx]TCopyPerspTrans.indCoord.fVInvZ
fstp [ecx]TCopyPerspTrans.indCoord.fUInvZ
jmp short FINISH_LOOPING
BORROW_OR_CARRY:
fadd [fNegInvZ] // Z V U
fxch st(1) // V Z U
fadd [fNegVInvZ] // V Z U
fxch st(2) // U Z V
fadd [fNegUInvZ] // U Z V
fxch st(1) // Z U V
fstp [ecx]TCopyPerspTrans.indCoord.fInvZ
fstp [ecx]TCopyPerspTrans.indCoord.fUInvZ
fstp [ecx]TCopyPerspTrans.indCoord.fVInvZ
FINISH_LOOPING:
// Y Loop control.
mov [ecx]TCopyPerspTrans.fxX.i4Fx,eax
mov edx,[esi]CDrawPolygonBase.iY
mov [ecx]TCopyPerspTrans.fxXDifference.i4Fx,ebx
inc edx
mov ebx,[esi]CDrawPolygonBase.iYTo
mov [esi]CDrawPolygonBase.iY,edx
cmp edx,ebx
jl Y_LOOP
}
}
//*****************************************************************************************
//
// Perspective, no Clut, non-transparent.
//
void DrawSubtriangle(TCopyPersp* pscan, CDrawPolygon<TCopyPersp>* pdtri)
{
fixed fx_inc;
fixed fx_diff;
float f_inc_uinvz;
float f_inc_vinvz;
float f_inc_invz;
TCopyPersp* plinc = &pdtri->pedgeBase->lineIncrement;
__asm
{
//
// Local copies of edge stepping values.
//
// fixed fx_inc = pdtri->pedgeBase->lineIncrement.fxX;
// fixed fx_diff = pdtri->pedgeBase->lineIncrement.fxXDifference;
// float f_inc_uinvz = pdtri->pedgeBase->lineIncrement.indCoord.fUInvZ;
// float f_inc_vinvz = pdtri->pedgeBase->lineIncrement.indCoord.fVInvZ;
// float f_inc_invz = pdtri->pedgeBase->lineIncrement.indCoord.fInvZ;
//
mov eax,[plinc]
mov esi,[pdtri] // Pointer to polygon object.
mov ecx,[pscan] // Pointer to scanline object.
mov ebx,[eax]TCopyPersp.fxX
mov edx,[eax]TCopyPersp.fxXDifference
mov fx_inc,ebx
mov fx_diff,edx
mov ebx,[eax]TCopyPersp.indCoord.fUInvZ
mov edx,[eax]TCopyPersp.indCoord.fVInvZ
mov f_inc_uinvz,ebx
mov ebx,[eax]TCopyPersp.indCoord.fInvZ
mov f_inc_vinvz,edx
mov f_inc_invz,ebx
//
// Iterate through the scanlines that intersect the subtriangle.
//
Y_LOOP:
push ebp // Save ebp.
// Check to see if we sould skip this scanline.
mov eax,[esi]CDrawPolygonBase.iY
mov ebx,[bEvenScanlinesOnly]
and eax,ebx
jnz END_OF_SCANLINE
mov ebx,[ecx]TCopyPersp.fxX.i4Fx
mov eax,[esi]CDrawPolygonBase.fxLineLength.i4Fx
add ebx,eax
mov eax,[ecx]TCopyPersp.fxX.i4Fx
sar ebx,16
mov edx,[esi]CDrawPolygonBase.iLineStartIndex
sar eax,16
add edx,ebx
sub eax,ebx // i_pixel
jge END_OF_SCANLINE
mov [i_screen_index],edx
mov [i_pixel],eax
//---------------------------------------------------------------------------
// Caclulate values for the first correction span, and start the divide for
// the next span.
//
// Copy global texture values.
mov ebx,[ecx]TCopyPersp.indCoord.fUInvZ
mov esi,[ecx]TCopyPersp.indCoord.fVInvZ
mov edi,[ecx]TCopyPersp.indCoord.fInvZ
mov [fGUInvZ],ebx
mov [fGVInvZ],esi
mov [fGInvZ],edi
mov ebx,[fDUInvZEdge]
mov esi,[fDVInvZEdge]
mov edi,[fDInvZEdge]
mov [fDUInvZScanline],ebx
mov [fDVInvZScanline],esi
mov [fDInvZScanline],edi
mov ebx,[iSubdivideLen]
// scan line is +ve
add eax,ebx
jg short PARTIAL_SUBDIVIDE_POS
// Check alignment.
add edx,eax // i_screen_index + i_pixel
add edx,edx // (i_screen_index + i_pixel) * 2
and edx,3
jz short DONE_DIVIDE_PIXEL
// We are using subdivide length - 1 for alignment.
mov ebx,[fDUInvZEdgeMinusOne]
mov esi,[fDVInvZEdgeMinusOne]
mov edi,[fDInvZEdgeMinusOne]
mov [fDUInvZScanline],ebx
mov [fDVInvZScanline],esi
mov [fDInvZScanline],edi
dec eax
jmp DONE_DIVIDE_PIXEL
PARTIAL_SUBDIVIDE_POS:
// calc the new +ve ratio
fild [i_pixel]
fld fInvSubdivideLen
fchs
xor eax,eax
fmulp st(1),st(0) // st(0) = (-)fInvSubdivideLen * i_pixel;
fld fDUInvZScanline // U C
// stall(1)
fmul st(0),st(1) // U*C C
fxch st(1) // C U*C
fld fDVInvZScanline // V C U*C
fxch st(1) // C V U*C
fmul st(1),st(0) // C V*C U*C
// stall(1)
fmul fDInvZScanline // Z*C V*C U*C
fxch st(2) // U*C V*C Z*C
fstp fDUInvZScanline // V*C Z*C
fstp fDVInvZScanline // Z*C
fstp fDInvZScanline
DONE_DIVIDE_PIXEL:
// Get current u, v and z values.
mov [iNextSubdivide],eax
mov ebx,dword ptr[fGInvZ] // f_z = fInverse(fGInvZ);
mov eax,iFI_SIGN_EXPONENT_SUB
sub eax,ebx
and ebx,iFI_MASK_MANTISSA
sar ebx,iSHIFT_MANTISSA
and eax,iFI_MASK_SIGN_EXPONENT
fld [fGUInvZ] // U/Z
add eax,dword ptr[i4InverseMantissa + ebx*4]
mov ebx,[bClampUV]
mov dword ptr[f_z],eax
#if (VER_CLAMP_UV_16BIT==TRUE)
//---------------------------------------------------------------------------
// Set current texture coordinates (clamped).
fmul [f_z]
fld [fGVInvZ]
fmul [f_z]
fxch st(1)
// stall(1)
fstp [f_u]
// Clamp f_u
mov eax,[f_u]
mov ebx,fTexEdgeTolerance
mov ecx,fTexWidth
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ecx
cmovg (_eax,_ecx)
mov ecx,fTexHeight
mov [f_u],eax
fld [f_u]
fadd [dFastFixed16Conversion]
fxch st(1)
// Clamp f_v
fstp [f_v]
mov eax,[f_v]
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ecx
cmovg (_eax,_ecx)
mov [f_v],eax
fld [f_v]
fadd [dFastFixed16Conversion]
fxch st(1)
fstp [d_temp_a]
fld [fGUInvZ]
fxch st(1)
fstp [d_temp_b]
// Increment u, v and z values.
fadd [fDUInvZScanline]
fld [fGVInvZ]
fadd [fDVInvZScanline]
fxch st(1)
fld [fGInvZ]
fadd [fDInvZScanline]
// Setup esi=uFrac, ecx=vFrac, edx=UVInt for Abrash texture loop.
mov edx,dword ptr[d_temp_a] // U (16.16)
mov eax,dword ptr[d_temp_b] // V (16.16)
sar eax,16 // Integral V
mov ecx,[iTexWidth] // Texture width.
imul eax,ecx // iv*twidth
sar edx,16 // Integral U
mov esi,dword ptr[d_temp_a] // Copy of U
shl esi,16 // UFrac
mov ecx,dword ptr[d_temp_b] // Copy of V
shl ecx,16 // VFrac
add edx,eax // iu + iv*twidth
mov ebx,[pvTextureBitmap] // Texture base pointer.
shr ebx,1 // Into pixel offset.
add edx,ebx // Add texture pointer in.
fstp [fGInvZ]
// Get next u, v and z values.
mov ebx,dword ptr[fGInvZ] // f_next_z = fInverse(fGInvZ);
mov eax,iFI_SIGN_EXPONENT_SUB
sub eax,ebx
and ebx,iFI_MASK_MANTISSA
sar ebx,iSHIFT_MANTISSA
and eax,iFI_MASK_SIGN_EXPONENT
fst [fGUInvZ]
mov ebx,dword ptr[i4InverseMantissa + ebx*4]
mov edi,[i_pixel]
add eax,ebx
mov ebx,[iNextSubdivide]
mov dword ptr[f_next_z],eax
sub edi,ebx
// Set new texture coordinate increments.
fmul [f_next_z]
fxch st(1)
fst [fGVInvZ]
fmul [f_next_z]
fxch st(1)
fstp [fU] // V
// Clamp fU
mov eax,[fU]
mov ebx,fTexEdgeTolerance
mov ebp,fTexWidth
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ebp
cmovg (_eax,_ebp)
mov ebp,fTexHeight
mov [fU],eax
fld [fU] // U,V
fsub [f_u] // U-fu,V
fxch st(1) // V,U-fu
fstp [fV] // U-fu
// Clamp fV
mov eax,[fV]
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ebp
cmovg (_eax,_ebp)
mov [fV],eax
fld [fV] // V U-fu
#else // (VER_CLAMP_UV_16BIT==TRUE)
//---------------------------------------------------------------------------
// Set current texture coordinates (not clamped).
fmul [f_z] // U
fld [fGVInvZ] // V/Z U
fmul [f_z] // V U
fxch st(1) // U V
// stall(1)
fst [f_u]
fadd [dFastFixed16Conversion] // iU V
fxch st(1) // V iU
fst [f_v]
fadd [dFastFixed16Conversion] // iV iU
fxch st(1) // iU iV
fstp [d_temp_a] // iV
fld [fGUInvZ] // U/Z iV
fxch st(1) // iV U/Z
fstp [d_temp_b] // U/Z
// Increment u, v and z values.
fadd [fDUInvZScanline] // U2/Z
fld [fGVInvZ]
fadd [fDVInvZScanline] // V2/Z U2/Z
fxch st(1) // U2/Z V2/Z
fld [fGInvZ] // 1/Z U2/Z V2/Z
fadd [fDInvZScanline] // 1/Z2 U2/Z V2/Z
// Setup esi=uFrac, ecx=vFrac, edx=UVInt for Abrash texture loop.
mov edx,dword ptr[d_temp_a] // U (16.16)
mov eax,dword ptr[d_temp_b] // V (16.16)
sar eax,16 // Integral V
mov ecx,[iTexWidth] // Texture width.
imul eax,ecx // iv*twidth
sar edx,16 // Integral U
mov esi,dword ptr[d_temp_a] // Copy of U
shl esi,16 // UFrac
mov ecx,dword ptr[d_temp_b] // Copy of V
shl ecx,16 // VFrac
add edx,eax // iu + iv*twidth
mov ebx,[pvTextureBitmap] // Texture base pointer.
shr ebx,1 // Into pixel offset.
add edx,ebx // Add texture pointer in.
fstp [fGInvZ]
// Get next u, v and z values.
mov ebx,dword ptr[fGInvZ] // f_next_z = fInverse(fGInvZ);
mov eax,iFI_SIGN_EXPONENT_SUB
sub eax,ebx
and ebx,iFI_MASK_MANTISSA
sar ebx,iSHIFT_MANTISSA
and eax,iFI_MASK_SIGN_EXPONENT
fst [fGUInvZ]
mov ebx,dword ptr[i4InverseMantissa + ebx*4]
mov edi,[i_pixel]
add eax,ebx
mov ebx,[iNextSubdivide]
mov dword ptr[f_next_z],eax
sub edi,ebx
// Set new texture coordinate increments.
fmul [f_next_z] // U,V/Z
fxch st(1) // V/Z,U
fst [fGVInvZ]
fmul [f_next_z] // V,U
fxch st(1) // U,V
fst [fU] // U,V
fsub [f_u] // U-fu,V
fxch st(1) // V,U-fu
fst [fV]
#endif // (VER_CLAMP_UV_16BIT==TRUE)
// ---------------------------------------------------------------------------------
// Both clmaped and non-clamped primitives end up here..
fsub [f_v] // V-fv,U-fu
fxch st(1) // U,V
fmul float ptr[fInverseIntTable+edi*4] // (C*U),V
fxch st(1) // V,(C*U)
// stall(1)
fmul float ptr[fInverseIntTable+edi*4] // (C*V),(C*U)
fxch st(1) // (C*U),(C*V)
fadd [dFastFixed16Conversion]
fxch st(1)
// stall(1)
fadd [dFastFixed16Conversion]
fxch st(1)
// stall(1)
fstp [d_temp_a]
fstp [d_temp_b]
mov edi,dword ptr[d_temp_a] // uslope
mov eax,dword ptr[d_temp_b] // vslope
sar edi,16 // integer part of uslope
mov ebp,dword ptr[d_temp_a] // uslope again
shl ebp,16 // fractional part of uslope
mov ebx,eax // vslope again
sar eax,16 // integer part of vslope
mov [w2dDeltaTex.uUFrac],ebp // store UFrac
shl ebx,16 // fractional part of vslope
mov ebp,[iTexWidth] // Load texture width.
imul eax,ebp // ivslope*twidth
add edi,eax // ivslope*twidth + iuslope
mov [w2dDeltaTex.uVFrac],ebx // store VFrac
mov [w2dDeltaTex.iUVInt+4],edi // store integer stride
add edi,ebp // add twidth to integer stride
mov [w2dDeltaTex.iUVInt],edi // store integer stride + twidth
mov eax,[iNextSubdivide]
test eax,eax // Next subdivision is zero length?
jz SUBDIVISION_LOOP
//iCacheNextSubdivide = iSetNextDividePixel(iNextSubdivide);
mov edi,[iSubdivideLen]
// scan line is +ve
add eax,edi
jle short DONE_DIVIDE_PIXEL_CACHE
// calc the new +ve ratio
fild iNextSubdivide
fld fInvSubdivideLen
fchs
fmulp st(1),st(0) // st(0) = (-)fInvSubdivideLen * i_pixel;
fld fDUInvZEdge // U C
xor eax,eax
fmul st(0),st(1) // U*C C
fxch st(1) // C U*C
fld fDVInvZEdge // V C U*C
fxch st(1) // C V U*C
fmul st(1),st(0) // C V*C U*C
mov [iCacheNextSubdivide],eax
fmul fDInvZEdge // Z*C V*C U*C
fxch st(2) // U*C V*C Z*C
fst fDUInvZScanline // U*C V*C Z*C
fadd fGUInvZ // U*C+L V*C Z*C
fxch st(1) // V*C U*C+L Z*C
fst fDVInvZScanline // V*C U*C+L Z*C
fadd fGVInvZ // V*C+L U*C+L Z*C
fxch st(2) // Z*C U*C+L V*C+L
fst fDInvZScanline // Z*C U*C+L V*C+L
fadd fGInvZ // Z*C+L U*C+L V*C+L
fxch st(2)
fstp fGVInvZ
fstp fGUInvZ
fst fGInvZ
fdivr fOne // Start the next division.
jmp short SUBDIVISION_LOOP
DONE_DIVIDE_PIXEL_CACHE:
// Copy texture values.
mov ebp,[fDUInvZEdge]
mov ebx,[fDVInvZEdge]
mov edi,[fDInvZEdge]
mov [fDUInvZScanline],ebp
mov [fDVInvZScanline],ebx
mov [fDInvZScanline],edi
// Step texture values.
fld fDUInvZScanline // U
fadd fGUInvZ
fld fDVInvZScanline // V U
fadd fGVInvZ
fld fDInvZScanline // Z V U
fadd fGInvZ
fxch st(2) // U V Z
fstp fGUInvZ
fstp fGVInvZ
fst fGInvZ
fdivr fOne // Start the next division.
mov [iCacheNextSubdivide],eax
SUBDIVISION_LOOP:
//---------------------------------------------------------------------------
// Start the next subdivision.
// ecx,edx,esi = texture values
//
mov edi,[i_pixel]
mov eax,[iNextSubdivide]
sub edi,eax
//
// eax = i_pixel
// edi = inner loop count
// ecx,edx,esi = texture values
//
mov ebp,gsGlobals.pvScreen // Pointer the screen.
mov ebx,[i_screen_index] // Load scanline offset.
add ebx,eax // Add scanline offset to i_pixel
mov [i_pixel],eax // Save i_pixel.
lea ebp,[ebp+ebx*2] // Base of span in ebp.
xor eax,eax // Clear eax.
mov [pvBaseOfLine],ebp // Keep pointer to base of span.
jmp short INNER_LOOP
ALIGN 16
//
// Draw Pixels
//
INNER_LOOP:
add ecx,[w2dDeltaTex]CWalk2D.uVFrac // Step V fraction.
movzx eax,word ptr[edx*2] // Read texture value (low).
mov ebx,[w2dDeltaTex]CWalk2D.uUFrac // Load U fraction step.
sbb ebp,ebp // Get borrow from V fraction step.
add esi,ebx // Step U fraction.
adc edx,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + u carry.
mov ebx,[pvBaseOfLine] // Load pointer to base of line.
mov [ebx + edi*2],ax // Store pixel.
inc edi
jnz short INNER_LOOP
cmp [i_pixel],0
je END_OF_SCANLINE
//----------------------------------------------------------------------------------
// Implement perspective correction.
// stack top contains the result of the divide
// Preserve: ecx,edx,esi
//
mov edi,[iNextSubdivide]
mov eax,[iCacheNextSubdivide]
sub edi,eax
jz short EXIT_BEGIN_NEXT_QUICK_END // This should never happen.
fld [fGUInvZ] // U/Z,Z
fxch st(1) // Z,U/Z
fmul st(1),st(0) // Z,U
mov ebx,[bClampUV]
fmul [fGVInvZ] // V,U
#if (VER_CLAMP_UV_16BIT==TRUE)
//----------------------------------------------------------------------------------
// Clamp U and V
fxch st(1)
fstp [f_u]
fstp [f_v]
mov ebp,[f_u]
mov ebx,[fTexEdgeTolerance]
mov eax,[f_v]
cmp eax,ebx
cmovl (_eax,_ebx)
cmp ebp,ebx
cmovl (_ebp,_ebx)
mov ebx,[fTexWidth]
cmp ebp,ebx
cmovg (_ebp,_ebx)
mov ebx,[fTexHeight]
cmp eax,ebx
cmovg (_eax,_ebx)
mov [f_u],ebp
mov [f_v],eax
fld [f_u]
fld [f_v]
#endif // (VER_CLAMP_UV_16BIT==TRUE)
//----------------------------------------------------------------------------------
// Initialize walking values
fld st(1) // U,V,U
fsub [fU] // U-fU,V,U
fld st(1) // V,U-fU,V,U
fsub [fV] // V-fV,U-fU,V,U
fxch st(1) // U-fU,V-fV,V,U
fmul float ptr[fInverseIntTable+edi*4] // (U-fU)*C,V-fV,V,U
fxch st(2) // V,V-fV,(U-fU)*C,U
fstp [fV] // V-fV,(U-fU)*C,U
fmul float ptr[fInverseIntTable+edi*4] // (V-fV)*C,(U-fU)*C,U
fxch st(1) // (U-fU)*C,(V-fV)*C,U
fadd [dFastFixed16Conversion] // f(U-fU)*C,(V-fV)*C,U
fxch st(1) // (V-fV)*C,f(U-fU)*C,U
// stall(1)
fadd [dFastFixed16Conversion] // f(V-fV)*C,f(U-fU)*C,U
fxch st(2) // U,f(U-fU)*C,f(V-fV)*C
fstp [fU] // f(U-fU)*C,f(V-fV)*C
fstp [d_temp_a] // f(V-fV)*C
fstp [d_temp_b]
mov edi,dword ptr[d_temp_a] // uslope
mov eax,dword ptr[d_temp_b] // vslope
sar edi,16 // integer part of uslope
mov ebp,dword ptr[d_temp_a] // uslope again
shl ebp,16 // fractional part of uslope
mov ebx,eax // vslope again
sar eax,16 // integer part of vslope
mov [w2dDeltaTex.uUFrac],ebp // store UFrac
shl ebx,16 // fractional part of vslope
mov ebp,[iTexWidth] // Load texture width.
imul eax,ebp // ivslope*twidth
add edi,eax // ivslope*twidth + iuslope
mov [w2dDeltaTex.uVFrac],ebx // store VFrac
mov [w2dDeltaTex.iUVInt+4],edi // store integer stride
add edi,ebp // add twidth to integer stride
mov [w2dDeltaTex.iUVInt],edi // store integer stride + twidth
mov eax,[iCacheNextSubdivide] // Load cached next subdivision.
mov [iNextSubdivide],eax // eax == iNextSubdivide
test eax,eax // Next subdivision is zero length?
jz SUBDIVISION_LOOP
//----------------------------------------------------------------------------------
// Begin Next Subdivision
// scan line is +ve
add eax,[iSubdivideLen]
jle short DONE_DIVIDE_PIXEL_CACHE_END
// calc the new +ve ratio
fild iNextSubdivide
fld fInvSubdivideLen
fchs
fmulp st(1),st(0) // st(0) = (-)fInvSubdivideLen * i_pixel;
fld fDUInvZScanline // U C
xor eax,eax
fmul st(0),st(1) // U*C C
fxch st(1) // C U*C
fld fDVInvZScanline // V C U*C
fxch st(1) // C V U*C
fmul st(1),st(0) // C V*C U*C
mov [iCacheNextSubdivide],eax
fmul fDInvZScanline // Z*C V*C U*C
fxch st(2) // U*C V*C Z*C
fst fDUInvZScanline // U V Z
fadd fGUInvZ
fxch st(1) // V U Z
fst fDVInvZScanline
fadd fGVInvZ
fxch st(2) // Z U V
fst fDInvZScanline
fadd fGInvZ
fxch st(2) // V U Z
fstp fGVInvZ
fstp fGUInvZ
fst fGInvZ
fdivr fOne // Start the next division.
jmp SUBDIVISION_LOOP
DONE_DIVIDE_PIXEL_CACHE_END:
fld fDUInvZScanline // U
fadd fGUInvZ
fld fDVInvZScanline // V U
fadd fGVInvZ
fld fDInvZScanline // Z V U
fadd fGInvZ
fxch st(2) // U V Z
fstp fGUInvZ
fstp fGVInvZ
fst fGInvZ
fdivr fOne // Start the next division.
mov [iCacheNextSubdivide],eax
jmp SUBDIVISION_LOOP
EXIT_BEGIN_NEXT_QUICK_END:
// When the sub divide equals the cached sub-divide we end up here but
// there is an element left on the fp stack.
// This is never called unless there is something wrong with the loop
// counter.
// Dump value on stack
fcomp st(0)
jmp SUBDIVISION_LOOP
END_OF_SCANLINE:
pop ebp // Restore ebp.
//
// Increment the base edge.
//
mov esi,[pdtri] // Pointer to polygon object.
mov ecx,[pscan] // Pointer to scanline object.
// Step length, line starting address.
mov eax,[esi]CDrawPolygonBase.fxLineLength.i4Fx
mov ebx,[esi]CDrawPolygonBase.iLineStartIndex
add eax,[esi]CDrawPolygonBase.fxDeltaLineLength.i4Fx
add ebx,[gsGlobals.u4LinePixels]
mov [esi]CDrawPolygonBase.fxLineLength.i4Fx,eax
mov [esi]CDrawPolygonBase.iLineStartIndex,ebx
// Step edge values.
mov eax,[ecx]TCopyPersp.fxX.i4Fx
mov ebx,[ecx]TCopyPersp.fxXDifference.i4Fx
fld [ecx]TCopyPersp.indCoord.fUInvZ
fadd [f_inc_uinvz]
fld [ecx]TCopyPersp.indCoord.fVInvZ
fadd [f_inc_vinvz]
fld [ecx]TCopyPersp.indCoord.fInvZ
fadd [f_inc_invz]
add eax,[fx_inc]
add ebx,[fx_diff]
jge short NO_UNDERFLOW
// Underflow.
add ebx,0x00010000
jmp short BORROW_OR_CARRY
NO_UNDERFLOW:
cmp ebx,0x00010000
jle short NO_OVERFLOW
// Overflow.
sub ebx,0x00010000
jmp short BORROW_OR_CARRY
NO_OVERFLOW:
fstp [ecx]TCopyPersp.indCoord.fInvZ
fstp [ecx]TCopyPersp.indCoord.fVInvZ
fstp [ecx]TCopyPersp.indCoord.fUInvZ
jmp short FINISH_LOOPING
BORROW_OR_CARRY:
fadd [fNegInvZ] // Z V U
fxch st(1) // V Z U
fadd [fNegVInvZ] // V Z U
fxch st(2) // U Z V
fadd [fNegUInvZ] // U Z V
fxch st(1) // Z U V
fstp [ecx]TCopyPersp.indCoord.fInvZ
fstp [ecx]TCopyPersp.indCoord.fUInvZ
fstp [ecx]TCopyPersp.indCoord.fVInvZ
FINISH_LOOPING:
// Y Loop control.
mov [ecx]TCopyPersp.fxX.i4Fx,eax
mov edx,[esi]CDrawPolygonBase.iY
mov [ecx]TCopyPersp.fxXDifference.i4Fx,ebx
inc edx
mov ebx,[esi]CDrawPolygonBase.iYTo
mov [esi]CDrawPolygonBase.iY,edx
cmp edx,ebx
jl Y_LOOP
}
}
//*****************************************************************************************
//
// Linear, no clut, non-transparent.
//
//**********************************
void DrawSubtriangle(TCopyLinear* pscan, CDrawPolygon<TCopyLinear>* pdtri)
{
TCopyLinear* plinc = &pdtri->pedgeBase->lineIncrement;
_asm
{
push ebp // Save ebp.
mov edi,[plinc] // Pointer to scanline increment.
mov eax,[pscan] // Pointer to scanline.
mov esi,[pdtri] // Pointer to polygon.
//
// Copies of edge stepping values.
//
// fx_inc = pdtri->pedgeBase->lineIncrement.fxX;
// fx_diff = pdtri->pedgeBase->lineIncrement.fxXDifference;
// bf_u_inc = pdtri->pedgeBase->lineIncrement.indCoord.bfU;
// w1d_v_inc = pdtri->pedgeBase->lineIncrement.indCoord.w1dV;
// pscanCopy = pscan;
// pdtriCopy = pdtri;
//
mov ebx,[edi]TCopyLinear.fxX
mov ecx,[edi]TCopyLinear.fxXDifference
mov fx_inc,ebx
mov fx_diff,ecx
mov ebx,[edi]TCopyLinear.indCoord.bfU.i4Int
mov ecx,[edi]TCopyLinear.indCoord.bfU.u4Frac
mov bf_u_inc.i4Int,ebx
mov bf_u_inc.u4Frac,ecx
mov ebx,[edi]TCopyLinear.indCoord.w1dV.bfxValue.i4Int
mov ecx,[edi]TCopyLinear.indCoord.w1dV.bfxValue.u4Frac
mov w1d_v_inc.bfxValue.i4Int,ebx
mov w1d_v_inc.bfxValue.u4Frac,ecx
mov pdtriCopy,esi
mov pscanCopy,eax
// -------------------------------------------------------------------------
//
// Iterate through the scanlines that intersect the subtriangle.
//
// -------------------------------------------------------------------------
LIN_NEXT_SCAN_LINE:
mov ebx,[bEvenScanlinesOnly]
mov ebp,[esi]CDrawPolygonBase.iY
// check for odd scan lines here and skip if even
and ebp,ebx
jnz short INC_BASE_EDGE
mov ebx,[eax]TCopyLinear.fxX.i4Fx
mov ecx,[esi]CDrawPolygonBase.fxLineLength.i4Fx
mov edi,ebx
add ebx,ecx
sar ebx,16
mov ecx,[esi]CDrawPolygonBase.iLineStartIndex
sar edi,16
mov ebp,gsGlobals.pvScreen
add ecx,ebx
sub edi,ebx
jge short INC_BASE_EDGE // no pixels to draw
mov esi,[eax]TCopyLinear.indCoord.bfU.u4Frac // UFrac
mov edx,[eax]TCopyLinear.indCoord.w1dV.bfxValue.i4Int
lea ebp,[ebp + ecx*2]
mov ebx,[eax]TCopyLinear.indCoord.bfU.i4Int
add edx,ebx // Add integer steps.
mov ebx,[pvTextureBitmap] // Texture base pointer.
shr ebx,1 // Into pixel offset.
add edx,ebx // Add texture pointer in.
mov ecx,[eax]TCopyLinear.indCoord.w1dV.bfxValue.u4Frac // VFrac
mov [pvBaseOfLine],ebp
jmp short INNER_LOOP
ALIGN 16
//
// Draw Pixels
//
INNER_LOOP:
add ecx,[w2dDeltaTex]CWalk2D.uVFrac // Step V fraction.
movzx eax,word ptr[edx*2] // Read texture value (low).
mov ebx,[w2dDeltaTex]CWalk2D.uUFrac // Load U fraction step.
sbb ebp,ebp // Get borrow from V fraction step.
add esi,ebx // Step U fraction.
adc edx,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + u carry.
mov ebx,[pvBaseOfLine] // Load pointer to base of line.
mov [ebx + edi*2],ax // Store pixel.
inc edi
jnz short INNER_LOOP
// -------------------------------------------------------------------------
// Increment the base edge.
INC_BASE_EDGE:
mov eax,[pscanCopy]
mov ebp,bf_u_inc.i4Int
// AGI on eax.
mov ecx,[eax]TCopyLinear.indCoord.bfU.u4Frac
mov edx,[eax]TCopyLinear.indCoord.bfU.i4Int
mov edi,[eax]TCopyLinear.indCoord.w1dV.bfxValue.i4Int
mov esi,[eax]TCopyLinear.indCoord.w1dV.bfxValue.u4Frac
add edi,w1d_v_inc.bfxValue.i4Int
add ecx,bf_u_inc.u4Frac
adc edx,ebp
mov ebp,w1d_v_inc.bfxValue.u4Frac
add esi,ebp
jnc short NO_VFRAC_CARRY
add edi,[eax]TCopyLinear.indCoord.w1dV.iOffsetPerLine
NO_VFRAC_CARRY:
mov ebx,[eax]TCopyLinear.fxXDifference
mov ebp,[fx_diff]
add ebx,ebp
jge short NO_UNDERFLOW
// Undeflow of fxXDifference.
add ebx,0x00010000
mov ebp,bfNegDU.i4Int
add edi,w1dNegDV.bfxValue.i4Int
add ecx,bfNegDU.u4Frac
adc edx,ebp
mov ebp,w1dNegDV.bfxValue.u4Frac
add esi,ebp
jnc short EDGE_DONE
add edi,[eax]TCopyLinear.indCoord.w1dV.iOffsetPerLine
jmp short EDGE_DONE
NO_UNDERFLOW:
cmp ebx,0x00010000
jl short EDGE_DONE
// Overflow of fxXDifference.
sub ebx,0x00010000
mov ebp,bfNegDU.i4Int
add edi,w1dNegDV.bfxValue.i4Int
add ecx,bfNegDU.u4Frac
adc edx,ebp
mov ebp,w1dNegDV.bfxValue.u4Frac
add esi,ebp
jnc short EDGE_DONE
add edi,[eax]TCopyLinear.indCoord.w1dV.iOffsetPerLine
EDGE_DONE:
// Store modified variables and do looping.
mov [eax]TCopyLinear.indCoord.w1dV.bfxValue.u4Frac,esi
mov [eax]TCopyLinear.indCoord.w1dV.bfxValue.i4Int,edi
mov [eax]TCopyLinear.fxXDifference,ebx
mov esi,[pdtriCopy] // Pointer to polygon object.
mov [eax]TCopyLinear.indCoord.bfU.u4Frac,ecx
mov [eax]TCopyLinear.indCoord.bfU.i4Int,edx
// Step length, line starting address.
mov ebx,[esi]CDrawPolygonBase.fxLineLength.i4Fx
mov ecx,[esi]CDrawPolygonBase.iLineStartIndex
add ebx,[esi]CDrawPolygonBase.fxDeltaLineLength.i4Fx
add ecx,[gsGlobals.u4LinePixels]
mov [esi]CDrawPolygonBase.fxLineLength.i4Fx,ebx
mov [esi]CDrawPolygonBase.iLineStartIndex,ecx
mov ecx,[eax]TCopyLinear.fxX
mov ebp,[fx_inc]
add ecx,ebp
mov edx,[esi]CDrawPolygonBase.iY
mov [eax]TCopyLinear.fxX,ecx
inc edx
mov ebx,[esi]CDrawPolygonBase.iYTo
mov [esi]CDrawPolygonBase.iY,edx
cmp edx,ebx
jl LIN_NEXT_SCAN_LINE
pop ebp
}
}
//*****************************************************************************************
//
// Linear, No clut, Transparent.
//
//******************************
void DrawSubtriangle(TCopyLinearTrans* pscan, CDrawPolygon<TCopyLinearTrans>* pdtri)
{
TCopyLinearTrans* plinc = &pdtri->pedgeBase->lineIncrement;
_asm
{
push ebp // Save ebp.
mov edi,[plinc] // Pointer to scanline increment.
mov eax,[pscan] // Pointer to scanline.
mov esi,[pdtri] // Pointer to polygon.
//
// Copies of edge stepping values.
//
// fx_inc = pdtri->pedgeBase->lineIncrement.fxX;
// fx_diff = pdtri->pedgeBase->lineIncrement.fxXDifference;
// bf_u_inc = pdtri->pedgeBase->lineIncrement.indCoord.bfU;
// w1d_v_inc = pdtri->pedgeBase->lineIncrement.indCoord.w1dV;
// pscanCopy = pscan;
// pdtriCopy = pdtri;
//
mov ebx,[edi]TCopyLinearTrans.fxX
mov ecx,[edi]TCopyLinearTrans.fxXDifference
mov fx_inc,ebx
mov fx_diff,ecx
mov ebx,[edi]TCopyLinearTrans.indCoord.bfU.i4Int
mov ecx,[edi]TCopyLinearTrans.indCoord.bfU.u4Frac
mov bf_u_inc.i4Int,ebx
mov bf_u_inc.u4Frac,ecx
mov ebx,[edi]TCopyLinearTrans.indCoord.w1dV.bfxValue.i4Int
mov ecx,[edi]TCopyLinearTrans.indCoord.w1dV.bfxValue.u4Frac
mov w1d_v_inc.bfxValue.i4Int,ebx
mov w1d_v_inc.bfxValue.u4Frac,ecx
mov pdtriLin,esi
mov pscanLin,eax
LIN_NEXT_SCAN_LINE:
mov ebx,[bEvenScanlinesOnly]
mov edx,[esi]CDrawPolygonBase.iY
// check for odd scan lines here and skip if even
and edx,ebx
jnz short INC_BASE_EDGE
mov ebx,[eax]TCopyLinear.fxX.i4Fx
mov ecx,[esi]CDrawPolygonBase.fxLineLength.i4Fx
mov edi,ebx
add ebx,ecx
sar ebx,16
mov ecx,[esi]CDrawPolygonBase.iLineStartIndex
sar edi,16
mov ebp,gsGlobals.pvScreen
add ecx,ebx
sub edi,ebx
jge short INC_BASE_EDGE // no pixels to draw
mov esi,[eax]TCopyLinear.indCoord.bfU.u4Frac // UFrac
mov edx,[eax]TCopyLinear.indCoord.w1dV.bfxValue.i4Int
lea ebp,[ebp + ecx*2]
mov ebx,[eax]TCopyLinear.indCoord.bfU.i4Int
add edx,ebx // Add integer steps.
mov ebx,[pvTextureBitmap] // Texture base pointer.
shr ebx,1 // Into pixel offset.
mov ecx,[eax]TCopyLinear.indCoord.w1dV.bfxValue.u4Frac // VFrac
mov [pvBaseOfLine],ebp
add edx,ebx // Add to edx.
jmp short INNER_LOOP
ALIGN 16
//
// Draw Pixels
//
INNER_LOOP:
add ecx,[w2dDeltaTex]CWalk2D.uVFrac // Step V fraction.
movzx eax,word ptr[edx*2] // Read texture value (low).
mov ebx,[w2dDeltaTex]CWalk2D.uUFrac // Load U fraction step.
sbb ebp,ebp // Get borrow from V fraction step.
add esi,ebx // Step U fraction.
mov ebx,[pvBaseOfLine] // Load pointer to base of line.
adc edx,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + u carry.
test eax,eax
jz short SKIP_TRANSPARENT
mov [ebx + edi*2],ax // Store pixel.
SKIP_TRANSPARENT:
inc edi
jnz short INNER_LOOP
// -------------------------------------------------------------------------
// Increment the base edge.
INC_BASE_EDGE:
mov eax,[pscanLin]
mov ebp,bf_u_inc.i4Int
// AGI on eax.
mov ecx,[eax]TCopyLinearTrans.indCoord.bfU.u4Frac
mov edx,[eax]TCopyLinearTrans.indCoord.bfU.i4Int
mov edi,[eax]TCopyLinearTrans.indCoord.w1dV.bfxValue.i4Int
mov esi,[eax]TCopyLinearTrans.indCoord.w1dV.bfxValue.u4Frac
add edi,w1d_v_inc.bfxValue.i4Int
add ecx,bf_u_inc.u4Frac
adc edx,ebp
mov ebp,w1d_v_inc.bfxValue.u4Frac
add esi,ebp
jnc short NO_VFRAC_CARRY
add edi,[eax]TCopyLinearTrans.indCoord.w1dV.iOffsetPerLine
NO_VFRAC_CARRY:
mov ebx,[eax]TCopyLinearTrans.fxXDifference
mov ebp,[fx_diff]
add ebx,ebp
jge short NO_UNDERFLOW
// Undeflow of fxXDifference.
add ebx,0x00010000
mov ebp,bfNegDU.i4Int
add edi,w1dNegDV.bfxValue.i4Int
add ecx,bfNegDU.u4Frac
adc edx,ebp
mov ebp,w1dNegDV.bfxValue.u4Frac
add esi,ebp
jnc short EDGE_DONE
add edi,[eax]TCopyLinearTrans.indCoord.w1dV.iOffsetPerLine
jmp short EDGE_DONE
NO_UNDERFLOW:
cmp ebx,0x00010000
jl short EDGE_DONE
// Overflow of fxXDifference.
sub ebx,0x00010000
mov ebp,bfNegDU.i4Int
add edi,w1dNegDV.bfxValue.i4Int
add ecx,bfNegDU.u4Frac
adc edx,ebp
mov ebp,w1dNegDV.bfxValue.u4Frac
add esi,ebp
jnc short EDGE_DONE
add edi,[eax]TCopyLinearTrans.indCoord.w1dV.iOffsetPerLine
EDGE_DONE:
// Store modified variables and do looping.
mov [eax]TCopyLinearTrans.indCoord.w1dV.bfxValue.u4Frac,esi
mov [eax]TCopyLinearTrans.indCoord.w1dV.bfxValue.i4Int,edi
mov [eax]TCopyLinearTrans.fxXDifference,ebx
mov esi,[pdtriLin] // Pointer to polygon object.
mov [eax]TCopyLinearTrans.indCoord.bfU.u4Frac,ecx
mov [eax]TCopyLinearTrans.indCoord.bfU.i4Int,edx
// Step length, line starting address.
mov ebx,[esi]CDrawPolygonBase.fxLineLength.i4Fx
mov ecx,[esi]CDrawPolygonBase.iLineStartIndex
add ebx,[esi]CDrawPolygonBase.fxDeltaLineLength.i4Fx
add ecx,[gsGlobals.u4LinePixels]
mov [esi]CDrawPolygonBase.fxLineLength.i4Fx,ebx
mov [esi]CDrawPolygonBase.iLineStartIndex,ecx
mov ecx,[eax]TCopyLinearTrans.fxX
mov ebp,[fx_inc]
add ecx,ebp
mov edx,[esi]CDrawPolygonBase.iY
mov [eax]TCopyLinearTrans.fxX,ecx
inc edx
mov ebx,[esi]CDrawPolygonBase.iYTo
mov [esi]CDrawPolygonBase.iY,edx
cmp edx,ebx
jl LIN_NEXT_SCAN_LINE
pop ebp
}
}
//*****************************************************************************************
//
// Perspective, Terrain Clut, non-transparent.
//
void DrawSubtriangle(TCopyTerrainPersp* pscan, CDrawPolygon<TCopyTerrainPersp>* pdtri)
{
static void* pvLastTexture = 0;
static void* pvLastFogClut = 0;
TCopyTerrainPersp* plinc = &pdtri->pedgeBase->lineIncrement;
Assert(pu2TerrainTextureFogClut);
__asm
{
//
// Do self-modifications here.
//
mov eax,[pvTextureBitmap] // Texture pointer
mov ebx,[pu2TerrainTextureFogClut] // Terrain clut pointer. Hack.
mov ecx,[pvLastTexture]
mov edx,[pvLastFogClut]
cmp eax,ecx
jne short DO_MODIFY
cmp ebx,edx
je short DONE_WITH_MODIFY
DO_MODIFY:
mov [pvLastTexture],eax
mov [pvLastFogClut],ebx
lea ecx,[eax+1]
lea edx,[ebx+1]
mov [MODIFY_FOR_TEXTUREPOINTER_E-4],eax
mov [MODIFY_FOR_TEXTUREPOINTER_P1_E-4],ecx
mov [MODIFY_FOR_CLUT_E-4],ebx
mov [MODIFY_FOR_CLUT_P1_E-4],edx
mov [MODIFY_FOR_TEXTUREPOINTER_F-4],eax
mov [MODIFY_FOR_TEXTUREPOINTER_P1_F-4],ecx
mov [MODIFY_FOR_CLUT_F-4],ebx
mov [MODIFY_FOR_CLUT_P1_F-4],edx
mov [MODIFY_FOR_TEXTUREPOINTER_G-4],eax
mov [MODIFY_FOR_TEXTUREPOINTER_P1_G-4],ecx
mov [MODIFY_FOR_CLUT_G-4],ebx
mov [MODIFY_FOR_CLUT_P1_G-4],edx
mov [MODIFY_FOR_TEXTUREPOINTER_H-4],eax
mov [MODIFY_FOR_TEXTUREPOINTER_P1_H-4],ecx
mov [MODIFY_FOR_CLUT_H-4],ebx
mov [MODIFY_FOR_CLUT_P1_H-4],edx
DONE_WITH_MODIFY:
//
// Local copies of edge stepping values.
//
// fixed fx_inc = pdtri->pedgeBase->lineIncrement.fxX;
// fixed fx_diff = pdtri->pedgeBase->lineIncrement.fxXDifference;
// float f_inc_uinvz = pdtri->pedgeBase->lineIncrement.indCoord.fUInvZ;
// float f_inc_vinvz = pdtri->pedgeBase->lineIncrement.indCoord.fVInvZ;
// float f_inc_invz = pdtri->pedgeBase->lineIncrement.indCoord.fInvZ;
//
mov eax,[plinc]
mov esi,[pdtri] // Pointer to polygon object.
mov ecx,[pscan] // Pointer to scanline object.
mov ebx,[eax]TCopyTerrainPersp.fxX
mov edx,[eax]TCopyTerrainPersp.fxXDifference
mov fx_inc,ebx
mov fx_diff,edx
mov ebx,[eax]TCopyTerrainPersp.indCoord.fUInvZ
mov edx,[eax]TCopyTerrainPersp.indCoord.fVInvZ
mov f_inc_uinvz,ebx
mov ebx,[eax]TCopyTerrainPersp.indCoord.fInvZ
mov f_inc_vinvz,edx
mov f_inc_invz,ebx
//
// Iterate through the scanlines that intersect the subtriangle.
//
Y_LOOP:
push ebp // Save ebp.
// Check to see if we sould skip this scanline.
mov eax,[esi]CDrawPolygonBase.iY
mov ebx,[bEvenScanlinesOnly]
and eax,ebx
jnz END_OF_SCANLINE
mov ebx,[ecx]TCopyTerrainPersp.fxX.i4Fx
mov eax,[esi]CDrawPolygonBase.fxLineLength.i4Fx
add ebx,eax
mov eax,[ecx]TCopyTerrainPersp.fxX.i4Fx
sar ebx,16
mov edx,[esi]CDrawPolygonBase.iLineStartIndex
sar eax,16
add edx,ebx
sub eax,ebx // i_pixel
jge END_OF_SCANLINE
mov [i_screen_index],edx
mov [i_pixel],eax
//---------------------------------------------------------------------------
// Caclulate values for the first correction span, and start the divide for
// the next span.
//
// Copy global texture values.
mov ebx,[ecx]TCopyTerrainPersp.indCoord.fUInvZ
mov esi,[ecx]TCopyTerrainPersp.indCoord.fVInvZ
mov edi,[ecx]TCopyTerrainPersp.indCoord.fInvZ
mov [fGUInvZ],ebx
mov [fGVInvZ],esi
mov [fGInvZ],edi
mov ebx,[fDUInvZEdge]
mov esi,[fDVInvZEdge]
mov edi,[fDInvZEdge]
mov [fDUInvZScanline],ebx
mov [fDVInvZScanline],esi
mov [fDInvZScanline],edi
mov ebx,[iSubdivideLen]
// scan line is +ve
add eax,ebx
jg short PARTIAL_SUBDIVIDE_POS
// Check alignment.
add edx,eax // i_screen_index + i_pixel
add edx,edx // (i_screen_index + i_pixel) * 2
and edx,3
jz short DONE_DIVIDE_PIXEL
// We are using subdivide length - 1 for alignment.
mov ebx,[fDUInvZEdgeMinusOne]
mov esi,[fDVInvZEdgeMinusOne]
mov edi,[fDInvZEdgeMinusOne]
mov [fDUInvZScanline],ebx
mov [fDVInvZScanline],esi
mov [fDInvZScanline],edi
dec eax
jmp DONE_DIVIDE_PIXEL
PARTIAL_SUBDIVIDE_POS:
// calc the new +ve ratio
fild [i_pixel]
fld fInvSubdivideLen
fchs
fmulp st(1),st(0) // st(0) = (-)fInvSubdivideLen * i_pixel;
fld fDUInvZScanline // U C
xor eax,eax
fmul st(0),st(1) // U*C C
fxch st(1) // C U*C
fld fDVInvZScanline // V C U*C
fxch st(1) // C V U*C
fmul st(1),st(0) // C V*C U*C
// stall(1)
fmul fDInvZScanline // Z*C V*C U*C
fxch st(2) // U*C V*C Z*C
fstp fDUInvZScanline // V*C Z*C
fstp fDVInvZScanline // Z*C
fstp fDInvZScanline
DONE_DIVIDE_PIXEL:
// Get current u, v and z values.
mov [iNextSubdivide],eax
mov ebx,dword ptr[fGInvZ] // f_z = fInverse(fGInvZ);
mov eax,iFI_SIGN_EXPONENT_SUB
sub eax,ebx
and ebx,iFI_MASK_MANTISSA
sar ebx,iSHIFT_MANTISSA
and eax,iFI_MASK_SIGN_EXPONENT
fld [fGUInvZ] // U/Z
add eax,dword ptr[i4InverseMantissa + ebx*4]
mov ebx,[bClampUV]
mov dword ptr[f_z],eax
#if (VER_CLAMP_UV_16BIT==TRUE)
//---------------------------------------------------------------------------
// Set current texture coordinates (clamped).
fmul [f_z]
fld [fGVInvZ]
fmul [f_z]
fxch st(1)
// stall(1)
fstp [f_u]
// Clamp f_u
mov eax,[f_u]
mov ebx,fTexEdgeTolerance
mov ecx,fTexWidth
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ecx
cmovg (_eax,_ecx)
mov ecx,fTexHeight
mov [f_u],eax
fld [f_u]
fadd [dFastFixed16Conversion]
fxch st(1)
// Clamp f_v
fstp [f_v]
mov eax,[f_v]
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ecx
cmovg (_eax,_ecx)
mov [f_v],eax
fld [f_v]
fadd [dFastFixed16Conversion]
fxch st(1)
fstp [d_temp_a]
fld [fGUInvZ]
fxch st(1)
fstp [d_temp_b]
// Increment u, v and z values.
fadd [fDUInvZScanline]
fld [fGVInvZ]
fadd [fDVInvZScanline]
fxch st(1)
fld [fGInvZ]
fadd [fDInvZScanline]
// Setup esi=uFrac, ecx=vFrac, edx=UVInt for Abrash texture loop.
mov edx,dword ptr[d_temp_a] // U (16.16)
mov eax,dword ptr[d_temp_b] // V (16.16)
sar eax,16 // Integral V
mov ecx,[iTexWidth] // Texture width.
imul eax,ecx // iv*twidth
sar edx,16 // Integral U
mov esi,dword ptr[d_temp_a] // Copy of U
shl esi,16 // UFrac
mov ecx,dword ptr[d_temp_b] // Copy of V
shl ecx,16 // VFrac
add edx,eax // iu + iv*twidth
fstp [fGInvZ]
// Get next u, v and z values.
mov ebx,dword ptr[fGInvZ] // f_next_z = fInverse(fGInvZ);
mov eax,iFI_SIGN_EXPONENT_SUB
sub eax,ebx
and ebx,iFI_MASK_MANTISSA
sar ebx,iSHIFT_MANTISSA
and eax,iFI_MASK_SIGN_EXPONENT
fst [fGUInvZ]
mov ebx,dword ptr[i4InverseMantissa + ebx*4]
mov edi,[i_pixel]
add eax,ebx
mov ebx,[iNextSubdivide]
mov dword ptr[f_next_z],eax
sub edi,ebx
// Set new texture coordinate increments.
fmul [f_next_z]
fxch st(1)
fst [fGVInvZ]
fmul [f_next_z]
fxch st(1)
fstp [fU] // V
// Clamp fU
mov eax,[fU]
mov ebx,fTexEdgeTolerance
mov ebp,fTexWidth
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ebp
cmovg (_eax,_ebp)
mov ebp,fTexHeight
mov [fU],eax
fld [fU] // U,V
fsub [f_u] // U-fu,V
fxch st(1) // V,U-fu
fstp [fV] // U-fu
// Clamp fV
mov eax,[fV]
cmp eax,ebx
cmovl (_eax,_ebx)
cmp eax,ebp
cmovg (_eax,_ebp)
mov [fV],eax
fld [fV] // V U-fu
#else // (VER_CLAMP_UV_16BIT==TRUE)
//---------------------------------------------------------------------------
// Set current texture coordinates (not clamped).
fmul [f_z] // U
fld [fGVInvZ] // V/Z U
fmul [f_z] // V U
fxch st(1) // U V
// stall(1)
fst [f_u]
fadd [dFastFixed16Conversion] // iU V
fxch st(1) // V iU
fst [f_v]
fadd [dFastFixed16Conversion] // iV iU
fxch st(1) // iU iV
fstp [d_temp_a] // iV
fld [fGUInvZ] // U/Z iV
fxch st(1) // iV U/Z
fstp [d_temp_b] // U/Z
// Increment u, v and z values.
fadd [fDUInvZScanline] // U2/Z
fld [fGVInvZ]
fadd [fDVInvZScanline] // V2/Z U2/Z
fxch st(1) // U2/Z V2/Z
fld [fGInvZ] // 1/Z U2/Z V2/Z
fadd [fDInvZScanline] // 1/Z2 U2/Z V2/Z
// Setup esi=uFrac, ecx=vFrac, edx=UVInt for Abrash texture loop.
mov edx,dword ptr[d_temp_a] // U (16.16)
mov eax,dword ptr[d_temp_b] // V (16.16)
sar eax,16 // Integral V
mov ecx,[iTexWidth] // Texture width.
imul eax,ecx // iv*twidth
sar edx,16 // Integral U
mov esi,dword ptr[d_temp_a] // Copy of U
shl esi,16 // UFrac
mov ecx,dword ptr[d_temp_b] // Copy of V
shl ecx,16 // VFrac
add edx,eax // iu + iv*twidth
fstp [fGInvZ]
// Get next u, v and z values.
mov ebx,dword ptr[fGInvZ] // f_next_z = fInverse(fGInvZ);
mov eax,iFI_SIGN_EXPONENT_SUB
sub eax,ebx
and ebx,iFI_MASK_MANTISSA
sar ebx,iSHIFT_MANTISSA
and eax,iFI_MASK_SIGN_EXPONENT
fst [fGUInvZ]
mov ebx,dword ptr[i4InverseMantissa + ebx*4]
mov edi,[i_pixel]
add eax,ebx
mov ebx,[iNextSubdivide]
mov dword ptr[f_next_z],eax
sub edi,ebx
// Set new texture coordinate increments.
fmul [f_next_z] // U,V/Z
fxch st(1) // V/Z,U
fst [fGVInvZ]
fmul [f_next_z] // V,U
fxch st(1) // U,V
fst [fU] // U,V
fsub [f_u] // U-fu,V
fxch st(1) // V,U-fu
fst [fV]
#endif // (VER_CLAMP_UV_16BIT==TRUE)
// ---------------------------------------------------------------------------------
// Both clmaped and non-clamped primitives end up here..
fsub [f_v] // V-fv,U-fu
fxch st(1) // U,V
fmul float ptr[fInverseIntTable+edi*4] // (C*U),V
fxch st(1) // V,(C*U)
// stall(1)
fmul float ptr[fInverseIntTable+edi*4] // (C*V),(C*U)
fxch st(1) // (C*U),(C*V)
fadd [dFastFixed16Conversion]
fxch st(1)
// stall(1)
fadd [dFastFixed16Conversion]
fxch st(1)
// stall(1)
fstp [d_temp_a]
fstp [d_temp_b]
mov edi,dword ptr[d_temp_a] // uslope
mov eax,dword ptr[d_temp_b] // vslope
sar edi,16 // integer part of uslope
mov ebp,dword ptr[d_temp_a] // uslope again
shl ebp,16 // fractional part of uslope
mov ebx,eax // vslope again
sar eax,16 // integer part of vslope
mov [w2dDeltaTex.uUFrac],ebp // store UFrac
shl ebx,16 // fractional part of vslope
mov ebp,[iTexWidth] // Load texture width.
imul eax,ebp // ivslope*twidth
add edi,eax // ivslope*twidth + iuslope
mov [w2dDeltaTex.uVFrac],ebx // store VFrac
mov [w2dDeltaTex.iUVInt+4],edi // store integer stride
add edi,ebp // add twidth to integer stride
mov [w2dDeltaTex.iUVInt],edi // store integer stride + twidth
mov eax,[iNextSubdivide]
test eax,eax // Next subdivision is zero length?
jz SUBDIVISION_LOOP
//iCacheNextSubdivide = iSetNextDividePixel(iNextSubdivide);
mov edi,[iSubdivideLen]
// scan line is +ve
add eax,edi
jle short DONE_DIVIDE_PIXEL_CACHE
// calc the new +ve ratio
fild iNextSubdivide
fld fInvSubdivideLen
fchs
fmulp st(1),st(0) // st(0) = (-)fInvSubdivideLen * i_pixel;
fld fDUInvZEdge // U C
xor eax,eax
fmul st(0),st(1) // U*C C
fxch st(1) // C U*C
fld fDVInvZEdge // V C U*C
fxch st(1) // C V U*C
fmul st(1),st(0) // C V*C U*C
mov [iCacheNextSubdivide],eax
fmul fDInvZEdge // Z*C V*C U*C
fxch st(2) // U*C V*C Z*C
fst fDUInvZScanline // U*C V*C Z*C
fadd fGUInvZ // U*C+L V*C Z*C
fxch st(1) // V*C U*C+L Z*C
fst fDVInvZScanline // V*C U*C+L Z*C
fadd fGVInvZ // V*C+L U*C+L Z*C
fxch st(2) // Z*C U*C+L V*C+L
fst fDInvZScanline // Z*C U*C+L V*C+L
fadd fGInvZ // Z*C+L U*C+L V*C+L
fxch st(2)
fstp fGVInvZ
fstp fGUInvZ
fst fGInvZ
fdivr fOne // Start the next division.
jmp short SUBDIVISION_LOOP
DONE_DIVIDE_PIXEL_CACHE:
// Copy texture values.
mov ebp,[fDUInvZEdge]
mov ebx,[fDVInvZEdge]
mov edi,[fDInvZEdge]
mov [fDUInvZScanline],ebp
mov [fDVInvZScanline],ebx
mov [fDInvZScanline],edi
// Step texture values.
fld fDUInvZScanline // U
fadd fGUInvZ
fld fDVInvZScanline // V U
fadd fGVInvZ
fld fDInvZScanline // Z V U
fadd fGInvZ
fxch st(2) // U V Z
fstp fGUInvZ
fstp fGVInvZ
fst fGInvZ
fdivr fOne // Start the next division.
mov [iCacheNextSubdivide],eax
SUBDIVISION_LOOP:
//---------------------------------------------------------------------------
// Start the next subdivision.
// ecx,edx,esi = texture values
//
mov edi,[i_pixel]
mov eax,[iNextSubdivide]
sub edi,eax
//
// eax = i_pixel
// edi = inner loop count
// ecx,edx,esi = texture values
//
mov ebp,gsGlobals.pvScreen // Pointer the screen.
mov ebx,[i_screen_index] // Load scanline offset.
add ebx,eax // Add scanline offset to i_pixel
mov [i_pixel],eax // Save i_pixel.
lea ebp,[ebp+ebx*2] // Base of span in ebp.
mov eax,[u4TerrainFogMask]
mov [pvBaseOfLine],ebp // Keep pointer to base of span.
lea ebp,[ebp + edi*2] // Calculate destination address.
// Check alignement.
and ebp,3
jz short ALIGNED
//
// Do one pixel for alignment.
//
and al,[edx*2 + 0xDEADBEEF] // Read texture value (low).
MODIFY_FOR_TEXTUREPOINTER_E:
mov ebx,[w2dDeltaTex]CWalk2D.uVFrac // Load V fraction.
and ah,[edx*2 + 0xDEADBEEF] // Read texture value (high).
MODIFY_FOR_TEXTUREPOINTER_P1_E:
add ecx,ebx // Step V fraction.
sbb ebp,ebp // Get borrow from V fraction step.
mov ebx,[w2dDeltaTex]CWalk2D.uUFrac // Load U fraction.
mov cl,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (low).
MODIFY_FOR_CLUT_E:
add esi,ebx // Step U fraction.
mov ch,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (high).
MODIFY_FOR_CLUT_P1_E:
mov ebx,[pvBaseOfLine] // Load pointer to destination.
adc edx,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + u carry.
mov eax,[u4TerrainFogMask]
mov [ebx + edi*2],cx // Store pixel.
inc edi
jz short DONE_WITH_SPAN
ALIGNED:
// Make sure we have two pixels left.
add edi,2
jg short FINISH_REMAINDER
//
// Do two pixels at a time.
//
mov ebx,[w2dDeltaTex]CWalk2D.uVFrac // Load V fraction.
mov eax,[u4TerrainFogMask]
INNER_LOOP_2P:
add ecx,ebx // Step V fraction.
mov ebx,[w2dDeltaTex]CWalk2D.uUFrac // Load U fraction step.
sbb ebp,ebp // Get borrow from V fraction step.
and al,[edx*2 + 0xDEADBEEF] // Read texture value (low).
MODIFY_FOR_TEXTUREPOINTER_F:
and ah,[edx*2 + 0xDEADBEEF] // Read texture value (high).
MODIFY_FOR_TEXTUREPOINTER_P1_F:
add esi,ebx // Step U fraction.
adc edx,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + U carry.
add ecx,[w2dDeltaTex]CWalk2D.uVFrac // Step V fraction.
sbb ebp,ebp // Get borrow from V fraction step.
mov bl,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (low).
MODIFY_FOR_CLUT_F:
mov bh,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (high).
MODIFY_FOR_CLUT_P1_F:
mov eax,[u4TerrainFogMask]
shl ebx,16 // Shift pixel up.
and al,[edx*2 + 0xDEADBEEF] // Read texture value (low).
MODIFY_FOR_TEXTUREPOINTER_G:
and ah,[edx*2 + 0xDEADBEEF] // Read texture value (high).
MODIFY_FOR_TEXTUREPOINTER_P1_G:
add esi,[w2dDeltaTex]CWalk2D.uUFrac // Step U fraction.
mov bl,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (low).
MODIFY_FOR_CLUT_G:
mov ebp,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + V carry.
adc edx,ebp // Integer step + U carry.
mov bh,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (high).
MODIFY_FOR_CLUT_P1_G:
mov ebp,[pvBaseOfLine]
mov eax,[u4TerrainFogMask]
rol ebx,16 // Reverse pixels.
mov [ebp + edi*2 - 4],ebx // Store pixel.
mov ebx,[w2dDeltaTex]CWalk2D.uVFrac // Load V fraction.
add edi,2
jle short INNER_LOOP_2P
// edi is 1 if there is a pixel left.
cmp edi,1
jne short DONE_WITH_SPAN
//
// Finish left over pixel.
//
// eax is zero.
//
FINISH_REMAINDER:
and al,[edx*2 + 0xDEADBEEF] // Read texture value (low).
MODIFY_FOR_TEXTUREPOINTER_H:
mov ebx,[w2dDeltaTex]CWalk2D.uVFrac // Load V fraction.
and ah,[edx*2 + 0xDEADBEEF] // Read texture value (high).
MODIFY_FOR_TEXTUREPOINTER_P1_H:
add ecx,ebx // Step V fraction.
sbb ebp,ebp // Get borrow from V fraction step.
mov ebx,[w2dDeltaTex]CWalk2D.uUFrac // Load U fraction.
mov cl,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (low).
MODIFY_FOR_CLUT_H:
add esi,ebx // Step U fraction.
mov ch,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (high).
MODIFY_FOR_CLUT_P1_H:
mov ebx,[pvBaseOfLine] // Load pointer to destination.
adc edx,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + u carry.
mov [ebx - 2],cx // Store pixel.
DONE_WITH_SPAN:
cmp [i_pixel],0
je END_OF_SCANLINE
//----------------------------------------------------------------------------------
// Implement perspective correction.
// stack top contains the result of the divide
// Preserve: ecx,edx,esi
//
mov edi,[iNextSubdivide]
mov eax,[iCacheNextSubdivide]
sub edi,eax
jz short EXIT_BEGIN_NEXT_QUICK_END // This should never happen.
fld [fGUInvZ] // U/Z,Z
fxch st(1) // Z,U/Z
fmul st(1),st(0) // Z,U
mov ebx,[bClampUV]
fmul [fGVInvZ] // V,U
#if (VER_CLAMP_UV_16BIT==TRUE)
//----------------------------------------------------------------------------------
// Clamp U and V
fxch st(1)
fstp [f_u]
fstp [f_v]
mov ebp,[f_u]
mov ebx,[fTexEdgeTolerance]
mov eax,[f_v]
cmp eax,ebx
cmovl (_eax,_ebx)
cmp ebp,ebx
cmovl (_ebp,_ebx)
mov ebx,[fTexWidth]
cmp ebp,ebx
cmovg (_ebp,_ebx)
mov ebx,[fTexHeight]
cmp eax,ebx
cmovg (_eax,_ebx)
mov [f_u],ebp
mov [f_v],eax
fld [f_u]
fld [f_v]
#endif // (VER_CLAMP_UV_16BIT==TRUE)
//----------------------------------------------------------------------------------
// Initialize walking values
fld st(1) // U,V,U
fsub [fU] // U-fU,V,U
fld st(1) // V,U-fU,V,U
fsub [fV] // V-fV,U-fU,V,U
fxch st(1) // U-fU,V-fV,V,U
fmul float ptr[fInverseIntTable+edi*4] // (U-fU)*C,V-fV,V,U
fxch st(2) // V,V-fV,(U-fU)*C,U
fstp [fV] // V-fV,(U-fU)*C,U
fmul float ptr[fInverseIntTable+edi*4] // (V-fV)*C,(U-fU)*C,U
fxch st(1) // (U-fU)*C,(V-fV)*C,U
fadd [dFastFixed16Conversion] // f(U-fU)*C,(V-fV)*C,U
fxch st(1) // (V-fV)*C,f(U-fU)*C,U
// stall(1)
fadd [dFastFixed16Conversion] // f(V-fV)*C,f(U-fU)*C,U
fxch st(2) // U,f(U-fU)*C,f(V-fV)*C
fstp [fU] // f(U-fU)*C,f(V-fV)*C
fstp [d_temp_a] // f(V-fV)*C
fstp [d_temp_b]
mov edi,dword ptr[d_temp_a] // uslope
mov eax,dword ptr[d_temp_b] // vslope
sar edi,16 // integer part of uslope
mov ebp,dword ptr[d_temp_a] // uslope again
shl ebp,16 // fractional part of uslope
mov ebx,eax // vslope again
sar eax,16 // integer part of vslope
mov [w2dDeltaTex.uUFrac],ebp // store UFrac
shl ebx,16 // fractional part of vslope
mov ebp,[iTexWidth] // Load texture width.
imul eax,ebp // ivslope*twidth
add edi,eax // ivslope*twidth + iuslope
mov [w2dDeltaTex.uVFrac],ebx // store VFrac
mov [w2dDeltaTex.iUVInt+4],edi // store integer stride
add edi,ebp // add twidth to integer stride
mov [w2dDeltaTex.iUVInt],edi // store integer stride + twidth
mov eax,[iCacheNextSubdivide] // Load cached next subdivision.
mov [iNextSubdivide],eax // eax == iNextSubdivide
test eax,eax // Next subdivision is zero length?
jz SUBDIVISION_LOOP
//----------------------------------------------------------------------------------
// Begin Next Subdivision
// scan line is +ve
add eax,[iSubdivideLen]
jle short DONE_DIVIDE_PIXEL_CACHE_END
// calc the new +ve ratio
fild iNextSubdivide
fld fInvSubdivideLen
fchs
fmulp st(1),st(0) // st(0) = (-)fInvSubdivideLen * i_pixel;
fld fDUInvZScanline // U C
xor eax,eax
fmul st(0),st(1) // U*C C
fxch st(1) // C U*C
fld fDVInvZScanline // V C U*C
fxch st(1) // C V U*C
fmul st(1),st(0) // C V*C U*C
mov [iCacheNextSubdivide],eax
fmul fDInvZScanline // Z*C V*C U*C
fxch st(2) // U*C V*C Z*C
fst fDUInvZScanline // U V Z
fadd fGUInvZ
fxch st(1) // V U Z
fst fDVInvZScanline
fadd fGVInvZ
fxch st(2) // Z U V
fst fDInvZScanline
fadd fGInvZ
fxch st(2) // V U Z
fstp fGVInvZ
fstp fGUInvZ
fst fGInvZ
fdivr fOne // Start the next division.
jmp SUBDIVISION_LOOP
DONE_DIVIDE_PIXEL_CACHE_END:
fld fDUInvZScanline // U
fadd fGUInvZ
fld fDVInvZScanline // V U
fadd fGVInvZ
fld fDInvZScanline // Z V U
fadd fGInvZ
fxch st(2) // U V Z
fstp fGUInvZ
fstp fGVInvZ
fst fGInvZ
fdivr fOne // Start the next division.
mov [iCacheNextSubdivide],eax
jmp SUBDIVISION_LOOP
EXIT_BEGIN_NEXT_QUICK_END:
// When the sub divide equals the cached sub-divide we end up here but
// there is an element left on the fp stack.
// This is never called unless there is something wrong with the loop
// counter.
// Dump value on stack
ffree st(0)
fincstp
jmp SUBDIVISION_LOOP
END_OF_SCANLINE:
pop ebp // Restore ebp.
//
// Increment the base edge.
//
mov esi,[pdtri] // Pointer to polygon object.
mov ecx,[pscan] // Pointer to scanline object.
// Step length, line starting address.
mov eax,[esi]CDrawPolygonBase.fxLineLength.i4Fx
mov ebx,[esi]CDrawPolygonBase.iLineStartIndex
add eax,[esi]CDrawPolygonBase.fxDeltaLineLength.i4Fx
add ebx,[gsGlobals.u4LinePixels]
mov [esi]CDrawPolygonBase.fxLineLength.i4Fx,eax
mov [esi]CDrawPolygonBase.iLineStartIndex,ebx
// Step edge values.
mov eax,[ecx]TCopyTerrainPersp.fxX.i4Fx
mov ebx,[ecx]TCopyTerrainPersp.fxXDifference.i4Fx
fld [ecx]TCopyTerrainPersp.indCoord.fUInvZ
fadd [f_inc_uinvz]
fld [ecx]TCopyTerrainPersp.indCoord.fVInvZ
fadd [f_inc_vinvz]
fld [ecx]TCopyTerrainPersp.indCoord.fInvZ
fadd [f_inc_invz]
add eax,[fx_inc]
add ebx,[fx_diff]
jge short NO_UNDERFLOW
// Underflow.
add ebx,0x00010000
jmp short BORROW_OR_CARRY
NO_UNDERFLOW:
cmp ebx,0x00010000
jle short NO_OVERFLOW
// Overflow.
sub ebx,0x00010000
jmp short BORROW_OR_CARRY
NO_OVERFLOW:
fstp [ecx]TCopyTerrainPersp.indCoord.fInvZ
fstp [ecx]TCopyTerrainPersp.indCoord.fVInvZ
fstp [ecx]TCopyTerrainPersp.indCoord.fUInvZ
jmp short FINISH_LOOPING
BORROW_OR_CARRY:
fadd [fNegInvZ] // Z V U
fxch st(1) // V Z U
fadd [fNegVInvZ] // V Z U
fxch st(2) // U Z V
fadd [fNegUInvZ] // U Z V
fxch st(1) // Z U V
fstp [ecx]TCopyTerrainPersp.indCoord.fInvZ
fstp [ecx]TCopyTerrainPersp.indCoord.fUInvZ
fstp [ecx]TCopyTerrainPersp.indCoord.fVInvZ
FINISH_LOOPING:
// Y Loop control.
mov [ecx]TCopyTerrainPersp.fxX.i4Fx,eax
mov edx,[esi]CDrawPolygonBase.iY
mov [ecx]TCopyTerrainPersp.fxXDifference.i4Fx,ebx
inc edx
mov ebx,[esi]CDrawPolygonBase.iYTo
mov [esi]CDrawPolygonBase.iY,edx
cmp edx,ebx
jl Y_LOOP
}
}
//*****************************************************************************************
//
// Linear, Terrain clut, non-transparent.
//
//**********************************
void DrawSubtriangle(TCopyTerrainLinear* pscan, CDrawPolygon<TCopyTerrainLinear>* pdtri)
{
static void* pvLastTexture = 0;
static void* pvLastFogClut = 0;
static TCopyTerrainLinear* pscanGlbl;
static CDrawPolygon<TCopyTerrainLinear>* pdtriGlbl;
TCopyTerrainLinear* plinc = &pdtri->pedgeBase->lineIncrement;
Assert(pu2TerrainTextureFogClut);
_asm
{
//
// Do self modifications.
//
mov ebx,[pvTextureBitmap] // Texture pointer.
mov ecx,[pu2TerrainTextureFogClut] // Terrain clut pointer. Hack.
mov edi,[pvLastTexture]
mov edx,[pvLastFogClut]
cmp ebx,edi
jne short DO_MODIFY
cmp ecx,edx
je short DONE_WITH_MODIFY
DO_MODIFY:
mov [pvLastTexture],ebx
mov [pvLastFogClut],ecx
lea edi,[ebx+1]
lea edx,[ecx+1]
mov [MODIFY_FOR_TEXTUREPOINTER_E-4],ebx
mov [MODIFY_FOR_TEXTUREPOINTER_P1_E-4],edi
mov [MODIFY_FOR_CLUT_E-4],ecx
mov [MODIFY_FOR_CLUT_P1_E-4],edx
mov [MODIFY_FOR_TEXTUREPOINTER_F-4],ebx
mov [MODIFY_FOR_TEXTUREPOINTER_P1_F-4],edi
mov [MODIFY_FOR_CLUT_F-4],ecx
mov [MODIFY_FOR_CLUT_P1_F-4],edx
mov [MODIFY_FOR_TEXTUREPOINTER_G-4],ebx
mov [MODIFY_FOR_TEXTUREPOINTER_P1_G-4],edi
mov [MODIFY_FOR_CLUT_G-4],ecx
mov [MODIFY_FOR_CLUT_P1_G-4],edx
mov [MODIFY_FOR_TEXTUREPOINTER_H-4],ebx
mov [MODIFY_FOR_TEXTUREPOINTER_P1_H-4],edi
mov [MODIFY_FOR_CLUT_H-4],ecx
mov [MODIFY_FOR_CLUT_P1_H-4],edx
DONE_WITH_MODIFY:
push ebp // Save ebp.
mov edi,[plinc] // Pointer to scanline increment.
mov eax,[pscan] // Pointer to scanline.
mov esi,[pdtri] // Pointer to polygon.
//
// Copies of edge stepping values.
//
// fx_inc = pdtri->pedgeBase->lineIncrement.fxX;
// fx_diff = pdtri->pedgeBase->lineIncrement.fxXDifference;
// bf_u_inc = pdtri->pedgeBase->lineIncrement.indCoord.bfU;
// w1d_v_inc = pdtri->pedgeBase->lineIncrement.indCoord.w1dV;
// pscanCopy = pscan;
// pdtriCopy = pdtri;
//
mov ebx,[edi]TCopyTerrainLinear.fxX
mov ecx,[edi]TCopyTerrainLinear.fxXDifference
mov fx_inc,ebx
mov fx_diff,ecx
mov ebx,[edi]TCopyTerrainLinear.indCoord.bfU.i4Int
mov ecx,[edi]TCopyTerrainLinear.indCoord.bfU.u4Frac
mov bf_u_inc.i4Int,ebx
mov bf_u_inc.u4Frac,ecx
mov ebx,[edi]TCopyTerrainLinear.indCoord.w1dV.bfxValue.i4Int
mov ecx,[edi]TCopyTerrainLinear.indCoord.w1dV.bfxValue.u4Frac
mov w1d_v_inc.bfxValue.i4Int,ebx
mov w1d_v_inc.bfxValue.u4Frac,ecx
mov pdtriGlbl,esi
mov pscanGlbl,eax
//
// Iterate through the scanlines that intersect the subtriangle.
//
LIN_NEXT_SCAN_LINE:
mov ebx,[bEvenScanlinesOnly]
mov ebp,[esi]CDrawPolygonBase.iY
// check for odd scan lines here and skip if even
and ebp,ebx
jnz short INC_BASE_EDGE
mov ebx,[eax]TCopyTerrainLinear.fxX.i4Fx
mov ecx,[esi]CDrawPolygonBase.fxLineLength.i4Fx
mov edi,ebx
add ebx,ecx
sar ebx,16
mov ecx,[esi]CDrawPolygonBase.iLineStartIndex
sar edi,16
mov ebp,gsGlobals.pvScreen
add ecx,ebx
sub edi,ebx
jge short INC_BASE_EDGE // no pixels to draw
mov esi,[eax]TCopyTerrainLinear.indCoord.bfU.u4Frac // UFrac
mov edx,[eax]TCopyTerrainLinear.indCoord.w1dV.bfxValue.i4Int
lea ebp,[ebp + ecx*2]
mov ebx,[eax]TCopyTerrainLinear.indCoord.bfU.i4Int
add edx,ebx // Add integer steps.
mov ecx,[eax]TCopyTerrainLinear.indCoord.w1dV.bfxValue.u4Frac // VFrac
mov [pvBaseOfLine],ebp
lea ebp,[ebp + edi*2] // Calculate destination address.
mov eax,[u4TerrainFogMask]
// Check alignement.
and ebp,3
jz short ALIGNED
// Detect one pixel case before starting.
inc edi
jz short FINISH_REMAINDER
//
// Do one pixel for alignment.
//
mov eax,[u4TerrainFogMask]
and al,[edx*2 + 0xDEADBEEF] // Read texture value (low).
MODIFY_FOR_TEXTUREPOINTER_E:
mov ebx,[w2dDeltaTex]CWalk2D.uVFrac // Load V fraction.
and ah,[edx*2 + 0xDEADBEEF] // Read texture value (high).
MODIFY_FOR_TEXTUREPOINTER_P1_E:
add ecx,ebx // Step V fraction.
sbb ebp,ebp // Get borrow from V fraction step.
mov ebx,[w2dDeltaTex]CWalk2D.uUFrac // Load U fraction.
mov cl,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (low).
MODIFY_FOR_CLUT_E:
add esi,ebx // Step U fraction.
mov ch,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (high).
MODIFY_FOR_CLUT_P1_E:
mov ebx,[pvBaseOfLine] // Load pointer to destination.
adc edx,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + u carry.
mov eax,[u4TerrainFogMask]
mov [ebx + edi*2 - 2],cx // Store pixel.
ALIGNED:
// Make sure we have two pixels left.
add edi,2
jg short FINISH_REMAINDER
//
// Do two pixels at a time.
//
mov ebx,[w2dDeltaTex]CWalk2D.uVFrac // Load V fraction.
mov eax,[u4TerrainFogMask]
INNER_LOOP_2P:
add ecx,ebx // Step V fraction.
mov ebx,[w2dDeltaTex]CWalk2D.uUFrac // Load U fraction step.
sbb ebp,ebp // Get borrow from V fraction step.
and al,[edx*2 + 0xDEADBEEF] // Read texture value (low).
MODIFY_FOR_TEXTUREPOINTER_F:
and ah,[edx*2 + 0xDEADBEEF] // Read texture value (high).
MODIFY_FOR_TEXTUREPOINTER_P1_F:
add esi,ebx // Step U fraction.
adc edx,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + U carry.
add ecx,[w2dDeltaTex]CWalk2D.uVFrac // Step V fraction.
sbb ebp,ebp // Get borrow from V fraction step.
mov bl,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (low).
MODIFY_FOR_CLUT_F:
mov bh,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (high).
MODIFY_FOR_CLUT_P1_F:
mov eax,[u4TerrainFogMask]
shl ebx,16 // Shift pixel up.
and al,[edx*2 + 0xDEADBEEF] // Read texture value (low).
MODIFY_FOR_TEXTUREPOINTER_G:
and ah,[edx*2 + 0xDEADBEEF] // Read texture value (high).
MODIFY_FOR_TEXTUREPOINTER_P1_G:
add esi,[w2dDeltaTex]CWalk2D.uUFrac // Step U fraction.
mov bl,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (low).
MODIFY_FOR_CLUT_G:
mov ebp,[w2dDeltaTex + ebp*4 + 4]CWalk2D.iUVInt // Integer step + V carry.
adc edx,ebp // Integer step + U carry.
mov bh,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (high).
MODIFY_FOR_CLUT_P1_G:
mov ebp,[pvBaseOfLine]
mov eax,[u4TerrainFogMask]
rol ebx,16 // Reverse pixels.
mov [ebp + edi*2 - 4],ebx // Store pixel.
mov ebx,[w2dDeltaTex]CWalk2D.uVFrac // Load V fraction.
add edi,2
jle short INNER_LOOP_2P
// edi is 1 if there is a pixel left.
cmp edi,1
jne short INC_BASE_EDGE
//
// Finish left over pixel.
//
FINISH_REMAINDER:
mov eax,[u4TerrainFogMask]
and al,[edx*2 + 0xDEADBEEF] // Read texture value (low).
MODIFY_FOR_TEXTUREPOINTER_H:
and ah,[edx*2 + 0xDEADBEEF] // Read texture value (high).
MODIFY_FOR_TEXTUREPOINTER_P1_H:
mov ebx,[pvBaseOfLine] // Load pointer to destination.
mov cl,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (low).
MODIFY_FOR_CLUT_H:
mov ch,[eax*2 + 0xDEADBEEF] // Do CLUT lookup (high).
MODIFY_FOR_CLUT_P1_H:
mov [ebx - 2],cx // Store pixel.
// -------------------------------------------------------------------------
// Increment the base edge.
INC_BASE_EDGE:
mov eax,[pscanGlbl]
mov ebp,bf_u_inc.i4Int
// AGI on eax.
mov ecx,[eax]TCopyTerrainLinear.indCoord.bfU.u4Frac
mov edx,[eax]TCopyTerrainLinear.indCoord.bfU.i4Int
mov edi,[eax]TCopyTerrainLinear.indCoord.w1dV.bfxValue.i4Int
mov esi,[eax]TCopyTerrainLinear.indCoord.w1dV.bfxValue.u4Frac
add edi,w1d_v_inc.bfxValue.i4Int
add ecx,bf_u_inc.u4Frac
adc edx,ebp
mov ebp,w1d_v_inc.bfxValue.u4Frac
add esi,ebp
jnc short NO_VFRAC_CARRY
add edi,[eax]TCopyTerrainLinear.indCoord.w1dV.iOffsetPerLine
NO_VFRAC_CARRY:
mov ebx,[eax]TCopyTerrainLinear.fxXDifference
mov ebp,[fx_diff]
add ebx,ebp
jge short NO_UNDERFLOW
// Undeflow of fxXDifference.
add ebx,0x00010000
mov ebp,bfNegDU.i4Int
add edi,w1dNegDV.bfxValue.i4Int
add ecx,bfNegDU.u4Frac
adc edx,ebp
mov ebp,w1dNegDV.bfxValue.u4Frac
add esi,ebp
jnc short EDGE_DONE
add edi,[eax]TCopyTerrainLinear.indCoord.w1dV.iOffsetPerLine
jmp short EDGE_DONE
NO_UNDERFLOW:
cmp ebx,0x00010000
jl short EDGE_DONE
// Overflow of fxXDifference.
sub ebx,0x00010000
mov ebp,bfNegDU.i4Int
add edi,w1dNegDV.bfxValue.i4Int
add ecx,bfNegDU.u4Frac
adc edx,ebp
mov ebp,w1dNegDV.bfxValue.u4Frac
add esi,ebp
jnc short EDGE_DONE
add edi,[eax]TCopyTerrainLinear.indCoord.w1dV.iOffsetPerLine
EDGE_DONE:
// Store modified variables and do looping.
mov [eax]TCopyTerrainLinear.indCoord.w1dV.bfxValue.u4Frac,esi
mov [eax]TCopyTerrainLinear.indCoord.w1dV.bfxValue.i4Int,edi
mov [eax]TCopyTerrainLinear.fxXDifference,ebx
mov esi,[pdtriGlbl] // Pointer to polygon object.
mov [eax]TCopyTerrainLinear.indCoord.bfU.u4Frac,ecx
mov [eax]TCopyTerrainLinear.indCoord.bfU.i4Int,edx
// Step length, line starting address.
mov ebx,[esi]CDrawPolygonBase.fxLineLength.i4Fx
mov ecx,[esi]CDrawPolygonBase.iLineStartIndex
add ebx,[esi]CDrawPolygonBase.fxDeltaLineLength.i4Fx
add ecx,[gsGlobals.u4LinePixels]
mov [esi]CDrawPolygonBase.fxLineLength.i4Fx,ebx
mov [esi]CDrawPolygonBase.iLineStartIndex,ecx
mov ecx,[eax]TCopyTerrainLinear.fxX
mov ebp,[fx_inc]
add ecx,ebp
mov edx,[esi]CDrawPolygonBase.iY
mov [eax]TCopyTerrainLinear.fxX,ecx
inc edx
mov ebx,[esi]CDrawPolygonBase.iYTo
mov [esi]CDrawPolygonBase.iY,edx
cmp edx,ebx
jl LIN_NEXT_SCAN_LINE
pop ebp
}
}