执行过程中的疯狂行为
我一直在用 gcc 做一些内联汇编。一切都几乎正常,直到某些行为让我感到困惑。我正在计算有理多项式,但需要使用 80 位常量。生成的代码似乎很完美,但在执行时,80 位系数之一在加载到 fpu 中时加载为 0,即使在内存中字节值不为零(并且我认为它是有效的 80 -bit real,因为在使用 masm 生成的代码运行时,完全相同的常量加载良好)。以下是 gdb 会话的输出:
(gdb) disassemble
Dump of assembler code for function poly4(double):
0x00402d7c <+0>: push %ebp
0x00402d7d <+1>: mov %esp,%ebp
0x00402d7f <+3>: sub $0x8,%esp
0x00402d82 <+6>: mov 0x8(%ebp),%eax
0x00402d85 <+9>: mov %eax,-0x8(%ebp)
0x00402d88 <+12>: mov 0xc(%ebp),%eax
0x00402d8b <+15>: mov %eax,-0x4(%ebp)
0x00402d8e <+18>: fld1
0x00402d90 <+20>: fldl -0x8(%ebp)
0x00402d93 <+23>: fmul %st(0),%st
0x00402d95 <+25>: fdivrp %st,%st(1)
0x00402d97 <+27>: fldt 0x40470e
0x00402d9d <+33>: fadd %st(1),%st
0x00402d9f <+35>: fmul %st(1),%st
0x00402da1 <+37>: fldt 0x404704
0x00402da7 <+43>: faddp %st,%st(1)
0x00402da9 <+45>: fmul %st(1),%st
0x00402dab <+47>: fldt 0x4046fa
0x00402db1 <+53>: faddp %st,%st(1)
0x00402db3 <+55>: fmul %st(1),%st
0x00402db5 <+57>: fldt 0x4046f0
0x00402dbb <+63>: faddp %st,%st(1)
0x00402dbd <+65>: fmul %st(1),%st
=> 0x00402dbf <+67>: fldt 0x4046e6
0x00402dc5 <+73>: faddp %st,%st(1)
...snip....
End of assembler dump.
(gdb) info registers st0 st1 st2 st3 st4 st5
st0 2.7412088761933612e-006 (raw 0x3fecb7f59c22579f9f60)
st1 0.00071574511983807409 (raw 0x3ff4bba0d78724c01468)
st2 <invalid float value> (raw 0x00077c81cc3b0002021e)
st3 <invalid float value> (raw 0x00020098007c00f8f0c0)
st4 0 (raw 0x000013af076300003654)
st5 <invalid float value> (raw 0x0762000000000002021e)
(gdb) x/5xh 0x4046e6
0x4046e6 <_ZL11s_NORMAL_q5>: 0x8996 0xa5d6 0x3d00 0x990a 0x3ff1
(gdb) stepi
0x00402dc5 1577 );
(gdb) info registers st0 st1 st2 st3 st4 st5
st0 0 (raw 0x00000000000000000000)
st1 2.7412088761933612e-006 (raw 0x3fecb7f59c22579f9f60)
st2 0.00071574511983807409 (raw 0x3ff4bba0d78724c01468)
st3 <invalid float value> (raw 0x00077c81cc3b0002021e)
st4 <invalid float value> (raw 0x00020098007c00f8f0c0)
st5 0 (raw 0x000013af076300003654)
(gdb) disassemble
Dump of assembler code for function poly4(double):
0x00402d7c <+0>: push %ebp
0x00402d7d <+1>: mov %esp,%ebp
0x00402d7f <+3>: sub $0x8,%esp
0x00402d82 <+6>: mov 0x8(%ebp),%eax
0x00402d85 <+9>: mov %eax,-0x8(%ebp)
0x00402d88 <+12>: mov 0xc(%ebp),%eax
0x00402d8b <+15>: mov %eax,-0x4(%ebp)
0x00402d8e <+18>: fld1
0x00402d90 <+20>: fldl -0x8(%ebp)
0x00402d93 <+23>: fmul %st(0),%st
0x00402d95 <+25>: fdivrp %st,%st(1)
0x00402d97 <+27>: fldt 0x40470e
0x00402d9d <+33>: fadd %st(1),%st
0x00402d9f <+35>: fmul %st(1),%st
0x00402da1 <+37>: fldt 0x404704
0x00402da7 <+43>: faddp %st,%st(1)
0x00402da9 <+45>: fmul %st(1),%st
0x00402dab <+47>: fldt 0x4046fa
0x00402db1 <+53>: faddp %st,%st(1)
0x00402db3 <+55>: fmul %st(1),%st
0x00402db5 <+57>: fldt 0x4046f0
0x00402dbb <+63>: faddp %st,%st(1)
0x00402dbd <+65>: fmul %st(1),%st
0x00402dbf <+67>: fldt 0x4046e6
=> 0x00402dc5 <+73>: faddp %st,%st(1)
...snip...
End of assembler dump.
(gdb)
因此请注意:在 stepi 之前,我们即将执行“fldt 0x4046e6”,并且 0x4046e6 处的内存转储清楚地表明它不为零。然而“fldt 0x4046e6”导致零被加载到 st0 中。之前的所有 fldt 指令都运行良好;常量很好(masm 中具有相同常量的相同代码可以完美地工作)。 对于那些感兴趣的人,这里是常量的来源:
Double80 s_NORMAL_p5 = { 0xE0, 0x14, 0x24, 0x6E, 0x43, 0x6C, 0x37, 0xF4, 0xEF, 0x3F}; // 0x3FEFF4376C436E2414E0, 2.9112874951168791857936318084879e-5
Double80 s_NORMAL_p4 = { 0x74, 0x5B, 0x7C, 0x72, 0xE2, 0x9F, 0x55, 0xBA, 0xF5, 0x3F}; // 0x3FF5BA559FE2727C5B74, 0.0014216191932278934659235163911273
Double80 s_NORMAL_p3 = { 0x3B, 0xD1, 0x83, 0xB3, 0xE8, 0xC1, 0x26, 0xB6, 0xF9, 0x3F}; // 0x3FF9B626C1E8B383D13B, 0.022235277870649807464320442391811
Double80 s_NORMAL_p2 = { 0x4B, 0xA2, 0x6C, 0x9F, 0x32, 0x73, 0x75, 0x82, 0xFC, 0x3F}; // 0x3FFC827573329F6CA24B, 0.1274011611602473638801278160334
Double80 s_NORMAL_p1 = { 0x49, 0xDC, 0x10, 0x22, 0x5C, 0x81, 0x14, 0xDD, 0xFC, 0x3F}; // 0x3FFCDD14815C2210DC49, 0.2158985340579569904693315913281
Double80 s_NORMAL_p0 = { 0x3E, 0xCE, 0xA6, 0x2B, 0xB9, 0x83, 0x04, 0xBD, 0xF9, 0x3F}; // 0x3FF9BD0483B92BA6CE3E, 0.023073441764940173030448369674463
Double80 s_NORMAL_q5 = { 0x96, 0x89, 0xD6, 0xA5, 0x00, 0x3D, 0x0A, 0x99, 0xF1, 0x3F}; // 0x3FF1990A3D00A5D68996, 0.0000729751555083966204509375
Double80 s_NORMAL_q4 = { 0xF8, 0x37, 0xEF, 0xEB, 0x8B, 0x14, 0xE2, 0xF7, 0xF6, 0x3F}; // 0x3FF6F7E2148BEBEF37F8, 0.0037823963320275824448625
Double80 s_NORMAL_q3 = { 0x35, 0xC5, 0x61, 0x91, 0xF0, 0xC9, 0x24, 0x87, 0xFB, 0x3F}; // 0x3FFB8724C9F09161C535, 0.065988137868928551531
Double80 s_NORMAL_q2 = { 0xCC, 0x68, 0x85, 0xAF, 0x42, 0xEB, 0xBC, 0xEF, 0xFD, 0x3F}; // 0x3FFDEFBCEB42AF8568CC, 0.4682382124808651180225
Double80 s_NORMAL_q1 = { 0xF3, 0xDB, 0x06, 0x40, 0x84, 0xA2, 0x62, 0xA4, 0xFF, 0x3F}; // 0x3FFFA462A2844006DBF3, 1.28426009614491121036
这是poly4函数的来源:
inline long double poly4(double y)
{
__asm__(
"\n\t" "fld1" //1
"\n\t" "fldl %[y]" //y, 1
"\n\t" "fmul %%st(0), %%st(0)" //y^2, 1
"\n\t" "fdivp %%st(0), %%st(1)" //1/y^2=xsq
//den
"\n\t" "fldt %[s_NORMAL_q1]" //q1, xsq
"\n\t" "fadd %%st(1), %%st(0)" //q1+xsq, xsq
"\n\t" "fmul %%st(1), %%st(0)" //(q1+xsq)*xsq, xsq
"\n\t" "fldt %[s_NORMAL_q2]" //q2, (q1+xsq)*xsq, xsq
"\n\t" "faddp %%st(0), %%st(1)" //q2+(q1+xsq)*xsq, xsq
"\n\t" "fmul %%st(1), %%st(0)" //(q2+(q1+xsq)*xsq)*xsq, xsq
"\n\t" "fldt %[s_NORMAL_q3]" //q3, (q2+(q1+xsq)*xsq)*xsq, xsq
"\n\t" "faddp %%st(0), %%st(1)" //q3+(q2+(q1+xsq)*xsq)*xsq, xsq
"\n\t" "fmul %%st(1), %%st(0)" //(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq, xsq
"\n\t" "fldt %[s_NORMAL_q4]" //q4, (q3+(q2+(q1+xsq)*xsq)*xsq)*xsq, xsq
"\n\t" "faddp %%st(0), %%st(1)" //q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq, xsq
"\n\t" "fmul %%st(1), %%st(0)" //(q4+(q3+(q2+(q1+ xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_q5]" //q5, (q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
//num
"\n\t" "fldt %[s_NORMAL_p0]" //p0, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //p0*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p1]" //p1, p0*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p1+p0*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //(p1+p0*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p2]" //p2, (p1+p0*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p2+(p1+p0*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //(p2+(p1+p0*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p3]" //p3, (p2+(p1+p0*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p3+(p2+(p1+p0*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p4]" //p4, (p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //(p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p5]" //p5, (p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p5+(p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmulp %%st(0), %%st(2)" //q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq)=den, (p5+(p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq)*xsq)*xsq=num
// num/den
"\n\t" "fdivp %%st(0), %%st(1)" //num/den
"\n\t" "fldt %[s_oneOverRootTwoPi]" // oneOverRootTwoPi, num/den
"\n\t" "fsubrp %%st(0), %%st(1)" //oneOverRootTwoPi - num/den
"\n\t" "fldl %[y]" //y, (oneOverRootTwoPi - num/den)
"\n\t" "fdiv %%st(0), %%st(1)" //y, (oneOverRootTwoPi - num/den)/y
"\n\t" "sub $8, %%esp"
"\n\t" "fstpl (%%esp)" //(oneOverRootTwoPi - num/den)/y
"\n\t" "call (%P[exp_X2_2])"
"\n\t" "add $8, %%esp"
"\n\t" "fmulp %%st(0), %%st(1)"
"\n\t" "leave"
"\n\t" "ret"
:
: [y] "m" (y)
, [s_oneOverRootTwoPi] "m" (*s_oneOverRootTwoPi)
, [s_NORMAL_p0] "m" (*s_NORMAL_p0)
, [s_NORMAL_p1] "m" (*s_NORMAL_p1)
, [s_NORMAL_p2] "m" (*s_NORMAL_p2)
, [s_NORMAL_p3] "m" (*s_NORMAL_p3)
, [s_NORMAL_p4] "m" (*s_NORMAL_p4)
, [s_NORMAL_p5] "m" (*s_NORMAL_p5)
, [s_NORMAL_q1] "m" (*s_NORMAL_q1)
, [s_NORMAL_q2] "m" (*s_NORMAL_q2)
, [s_NORMAL_q3] "m" (*s_NORMAL_q3)
, [s_NORMAL_q4] "m" (*s_NORMAL_q4)
, [s_NORMAL_q5] "m" (*s_NORMAL_q5)
, [exp_X2_2] "i" (exp_X2_2)
:
);
}
这是无效负载之前FPU的状态:
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
=>R6: Valid 0x3fecb7f59c22579f9f60 +2.7412088761933612e-006
R5: Empty 0x3ff6f7e2148bebef37f8
R4: Empty 0x000000020a0d00000007
R3: Empty 0xf1be000000000002021e
R2: Empty 0x00001697f1bf00003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff3320 PE C0 C1
TOP: 6
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff0fff
Instruction Pointer: 0x1b:0x00402dbd
Operand Pointer: 0xffff0023:0x004046f0
Opcode: 0xd8c9
我想知道“C1”标志的含义是什么上面的状态字 - 我找不到这方面的文档。 这是失败的fldt(由stepi执行)之后立即的状态:
(gdb) stepi
0x00402dc5 1485 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
R6: Valid 0x3fecb7f59c22579f9f60 +2.7412088761933612e-006
=>R5: Zero 0x00000000000000000000 +0
R4: Empty 0x000000020a0d00000007
R3: Empty 0xf1be000000000002021e
R2: Empty 0x00001697f1bf00003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff2920 PE C0
TOP: 5
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff07ff
Instruction Pointer: 0x1b:0x00402dbf
Operand Pointer: 0xffff0023:0x0040cce6
Opcode: 0xdb2d
好的,我现在已经修改了代码,以便0x00402db5和0x00402dbf处的指令是相同的。第一个成功,第二个失败。这是一个 gdb 会话,显示反汇编代码以及执行两条相同指令之前的 fpu 状态。状态中唯一显着的差异是在执行第二个 fldt 指令之前的状态中存在 C1 标志:
(gdb) disassemble
Dump of assembler code for function poly4(double):
0x00402d7c <+0>: push %ebp
0x00402d7d <+1>: mov %esp,%ebp
0x00402d7f <+3>: sub $0x8,%esp
0x00402d82 <+6>: mov 0x8(%ebp),%eax
0x00402d85 <+9>: mov %eax,-0x8(%ebp)
0x00402d88 <+12>: mov 0xc(%ebp),%eax
0x00402d8b <+15>: mov %eax,-0x4(%ebp)
=> 0x00402d8e <+18>: fld1
0x00402d90 <+20>: fldl -0x8(%ebp)
0x00402d93 <+23>: fmul %st(0),%st
0x00402d95 <+25>: fdivrp %st,%st(1)
0x00402d97 <+27>: fldt 0x40470e
0x00402d9d <+33>: fadd %st(1),%st
0x00402d9f <+35>: fmul %st(1),%st
0x00402da1 <+37>: fldt 0x404704
0x00402da7 <+43>: faddp %st,%st(1)
0x00402da9 <+45>: fmul %st(1),%st
0x00402dab <+47>: fldt 0x4046fa
0x00402db1 <+53>: faddp %st,%st(1)
0x00402db3 <+55>: fmul %st(1),%st
0x00402db5 <+57>: fldt 0x4046f0
0x00402dbb <+63>: faddp %st,%st(1)
0x00402dbd <+65>: fmul %st(1),%st
0x00402dbf <+67>: fldt 0x4046f0
0x00402dc5 <+73>: faddp %st,%st(1)
0x00402dc7 <+75>: fldt 0x4046dc
0x00402dcd <+81>: fmul %st(2),%st
0x00402dcf <+83>: fldt 0x4046d2
0x00402dd5 <+89>: faddp %st,%st(1)
0x00402dd7 <+91>: fmul %st(2),%st
0x00402dd9 <+93>: fldt 0x4046c8
0x00402ddf <+99>: faddp %st,%st(1)
0x00402de1 <+101>: fmul %st(2),%st
0x00402de3 <+103>: fldt 0x4046be
0x00402de9 <+109>: faddp %st,%st(1)
0x00402deb <+111>: fmul %st(2),%st
0x00402ded <+113>: fldt 0x4046b4
0x00402df3 <+119>: faddp %st,%st(1)
0x00402df5 <+121>: fmul %st(2),%st
0x00402df7 <+123>: fldt 0x4046aa
0x00402dfd <+129>: faddp %st,%st(1)
0x00402dff <+131>: fmulp %st,%st(2)
0x00402e01 <+133>: fdivrp %st,%st(1)
0x00402e03 <+135>: fldt 0x40408e
0x00402e09 <+141>: fsubrp %st,%st(1)
0x00402e0b <+143>: fldl -0x8(%ebp)
0x00402e0e <+146>: fdivr %st,%st(1)
0x00402e10 <+148>: sub $0x8,%esp
0x00402e13 <+151>: fstpl (%esp)
0x00402e16 <+154>: fwait
0x00402e17 <+155>: call 0x4013c0 <exp_X2_2(double)>
0x00402e1c <+160>: add $0x8,%esp
0x00402e1f <+163>: fmulp %st,%st(1)
0x00402e21 <+165>: fstl 0x406020
0x00402e27 <+171>: fld %st(0)
0x00402e29 <+173>: fsubl 0x406020
0x00402e2f <+179>: fildll 0x403020
0x00402e35 <+185>: fmulp %st,%st(1)
0x00402e37 <+187>: fstpl 0x406020
0x00402e3d <+193>: leave
0x00402e3e <+194>: ret
0x00402e3f <+195>: flds 0x40472c
0x00402e45 <+201>: leave
0x00402e46 <+202>: ret
End of assembler dump.
(gdb) tbreak *0x00402db5
Temporary breakpoint 61 at 0x402db5: file cody2.cpp, line 1489.
(gdb) continue
Continuing.
Temporary breakpoint 61, 0x00402db5 in poly4 (y=37.37840817302294) at cody2.cpp:1489
1489 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
=>R6: Valid 0x3ff0c71ba235b8f6a603 +4.7471033066735141e-005
R5: Empty 0x3ffb8724c9f09161c535
R4: Empty 0xf13d00000a0d00000007
R3: Empty 0x07ec000000000002021e
R2: Empty 0x000016cbc40900003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff3120 PE C0
TOP: 6
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff0fff
Instruction Pointer: 0x1b:0x00402db3
Operand Pointer: 0xffff0023:0x004046fa
Opcode: 0xd8c9
(gdb) stepi
0x00402dbb 1489 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
R6: Valid 0x3ff0c71ba235b8f6a603 +4.7471033066735141e-005
=>R5: Valid 0x3ff6f7e2148bebef37f8 +0.0037823963320275824
R4: Empty 0xf13d00000a0d00000007
R3: Empty 0x07ec000000000002021e
R2: Empty 0x000016cbc40900003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff2920 PE C0
TOP: 5
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff03ff
Instruction Pointer: 0x1b:0x00402db5
Operand Pointer: 0xffff0023:0x004046f0
Opcode: 0xdb2d
(gdb) stepi
0x00402dbd 1489 );
(gdb) stepi
0x00402dbf 1489 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
=>R6: Valid 0x3fecb7f59c22579f9f60 +2.7412088761933612e-006
R5: Empty 0x3ff6f7e2148bebef37f8
R4: Empty 0xf13d00000a0d00000007
R3: Empty 0x07ec000000000002021e
R2: Empty 0x000016cbc40900003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff3320 PE C0 C1
TOP: 6
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff0fff
Instruction Pointer: 0x1b:0x00402dbd
Operand Pointer: 0xffff0023:0x004046f0
Opcode: 0xd8c9
(gdb) stepi
0x00402dc5 1489 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
R6: Valid 0x3fecb7f59c22579f9f60 +2.7412088761933612e-006
=>R5: Zero 0x00000000000000000000 +0
R4: Empty 0xf13d00000a0d00000007
R3: Empty 0x07ec000000000002021e
R2: Empty 0x000016cbc40900003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff2920 PE C0
TOP: 5
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff07ff
Instruction Pointer: 0x1b:0x00402dbf
Operand Pointer: 0xffff0023:0x0040ccf0
Opcode: 0xdb2d
(gdb)
I have been doing some inline-asm with gcc. Everything is ALMOST working, up to some behaviour that is just baffling me. I am evaluating a rational polynomial, but need to use 80-bit constants. The generated code seems to be perfect, but on execution, one of the 80-bit coefficients, when loaded into the fpu, is loaded as 0, even though in memory the byte values are not zero (and I think it is a valid 80-bit real, as the exact same constant loads fine when run with code generated by masm). Here is the output from a gdb session:
(gdb) disassemble
Dump of assembler code for function poly4(double):
0x00402d7c <+0>: push %ebp
0x00402d7d <+1>: mov %esp,%ebp
0x00402d7f <+3>: sub $0x8,%esp
0x00402d82 <+6>: mov 0x8(%ebp),%eax
0x00402d85 <+9>: mov %eax,-0x8(%ebp)
0x00402d88 <+12>: mov 0xc(%ebp),%eax
0x00402d8b <+15>: mov %eax,-0x4(%ebp)
0x00402d8e <+18>: fld1
0x00402d90 <+20>: fldl -0x8(%ebp)
0x00402d93 <+23>: fmul %st(0),%st
0x00402d95 <+25>: fdivrp %st,%st(1)
0x00402d97 <+27>: fldt 0x40470e
0x00402d9d <+33>: fadd %st(1),%st
0x00402d9f <+35>: fmul %st(1),%st
0x00402da1 <+37>: fldt 0x404704
0x00402da7 <+43>: faddp %st,%st(1)
0x00402da9 <+45>: fmul %st(1),%st
0x00402dab <+47>: fldt 0x4046fa
0x00402db1 <+53>: faddp %st,%st(1)
0x00402db3 <+55>: fmul %st(1),%st
0x00402db5 <+57>: fldt 0x4046f0
0x00402dbb <+63>: faddp %st,%st(1)
0x00402dbd <+65>: fmul %st(1),%st
=> 0x00402dbf <+67>: fldt 0x4046e6
0x00402dc5 <+73>: faddp %st,%st(1)
...snip....
End of assembler dump.
(gdb) info registers st0 st1 st2 st3 st4 st5
st0 2.7412088761933612e-006 (raw 0x3fecb7f59c22579f9f60)
st1 0.00071574511983807409 (raw 0x3ff4bba0d78724c01468)
st2 <invalid float value> (raw 0x00077c81cc3b0002021e)
st3 <invalid float value> (raw 0x00020098007c00f8f0c0)
st4 0 (raw 0x000013af076300003654)
st5 <invalid float value> (raw 0x0762000000000002021e)
(gdb) x/5xh 0x4046e6
0x4046e6 <_ZL11s_NORMAL_q5>: 0x8996 0xa5d6 0x3d00 0x990a 0x3ff1
(gdb) stepi
0x00402dc5 1577 );
(gdb) info registers st0 st1 st2 st3 st4 st5
st0 0 (raw 0x00000000000000000000)
st1 2.7412088761933612e-006 (raw 0x3fecb7f59c22579f9f60)
st2 0.00071574511983807409 (raw 0x3ff4bba0d78724c01468)
st3 <invalid float value> (raw 0x00077c81cc3b0002021e)
st4 <invalid float value> (raw 0x00020098007c00f8f0c0)
st5 0 (raw 0x000013af076300003654)
(gdb) disassemble
Dump of assembler code for function poly4(double):
0x00402d7c <+0>: push %ebp
0x00402d7d <+1>: mov %esp,%ebp
0x00402d7f <+3>: sub $0x8,%esp
0x00402d82 <+6>: mov 0x8(%ebp),%eax
0x00402d85 <+9>: mov %eax,-0x8(%ebp)
0x00402d88 <+12>: mov 0xc(%ebp),%eax
0x00402d8b <+15>: mov %eax,-0x4(%ebp)
0x00402d8e <+18>: fld1
0x00402d90 <+20>: fldl -0x8(%ebp)
0x00402d93 <+23>: fmul %st(0),%st
0x00402d95 <+25>: fdivrp %st,%st(1)
0x00402d97 <+27>: fldt 0x40470e
0x00402d9d <+33>: fadd %st(1),%st
0x00402d9f <+35>: fmul %st(1),%st
0x00402da1 <+37>: fldt 0x404704
0x00402da7 <+43>: faddp %st,%st(1)
0x00402da9 <+45>: fmul %st(1),%st
0x00402dab <+47>: fldt 0x4046fa
0x00402db1 <+53>: faddp %st,%st(1)
0x00402db3 <+55>: fmul %st(1),%st
0x00402db5 <+57>: fldt 0x4046f0
0x00402dbb <+63>: faddp %st,%st(1)
0x00402dbd <+65>: fmul %st(1),%st
0x00402dbf <+67>: fldt 0x4046e6
=> 0x00402dc5 <+73>: faddp %st,%st(1)
...snip...
End of assembler dump.
(gdb)
So note: before the stepi, we are about to execute the "fldt 0x4046e6", and a dump of memory at 0x4046e6 clearly shows it is not zero. Yet the "fldt 0x4046e6" results in zero being loaded into st0. All the previous fldt instructions worked fine; the constants are good (the identical code in masm with the same constants works flawlessly).
For those interested, here is the source for the constants:
Double80 s_NORMAL_p5 = { 0xE0, 0x14, 0x24, 0x6E, 0x43, 0x6C, 0x37, 0xF4, 0xEF, 0x3F}; // 0x3FEFF4376C436E2414E0, 2.9112874951168791857936318084879e-5
Double80 s_NORMAL_p4 = { 0x74, 0x5B, 0x7C, 0x72, 0xE2, 0x9F, 0x55, 0xBA, 0xF5, 0x3F}; // 0x3FF5BA559FE2727C5B74, 0.0014216191932278934659235163911273
Double80 s_NORMAL_p3 = { 0x3B, 0xD1, 0x83, 0xB3, 0xE8, 0xC1, 0x26, 0xB6, 0xF9, 0x3F}; // 0x3FF9B626C1E8B383D13B, 0.022235277870649807464320442391811
Double80 s_NORMAL_p2 = { 0x4B, 0xA2, 0x6C, 0x9F, 0x32, 0x73, 0x75, 0x82, 0xFC, 0x3F}; // 0x3FFC827573329F6CA24B, 0.1274011611602473638801278160334
Double80 s_NORMAL_p1 = { 0x49, 0xDC, 0x10, 0x22, 0x5C, 0x81, 0x14, 0xDD, 0xFC, 0x3F}; // 0x3FFCDD14815C2210DC49, 0.2158985340579569904693315913281
Double80 s_NORMAL_p0 = { 0x3E, 0xCE, 0xA6, 0x2B, 0xB9, 0x83, 0x04, 0xBD, 0xF9, 0x3F}; // 0x3FF9BD0483B92BA6CE3E, 0.023073441764940173030448369674463
Double80 s_NORMAL_q5 = { 0x96, 0x89, 0xD6, 0xA5, 0x00, 0x3D, 0x0A, 0x99, 0xF1, 0x3F}; // 0x3FF1990A3D00A5D68996, 0.0000729751555083966204509375
Double80 s_NORMAL_q4 = { 0xF8, 0x37, 0xEF, 0xEB, 0x8B, 0x14, 0xE2, 0xF7, 0xF6, 0x3F}; // 0x3FF6F7E2148BEBEF37F8, 0.0037823963320275824448625
Double80 s_NORMAL_q3 = { 0x35, 0xC5, 0x61, 0x91, 0xF0, 0xC9, 0x24, 0x87, 0xFB, 0x3F}; // 0x3FFB8724C9F09161C535, 0.065988137868928551531
Double80 s_NORMAL_q2 = { 0xCC, 0x68, 0x85, 0xAF, 0x42, 0xEB, 0xBC, 0xEF, 0xFD, 0x3F}; // 0x3FFDEFBCEB42AF8568CC, 0.4682382124808651180225
Double80 s_NORMAL_q1 = { 0xF3, 0xDB, 0x06, 0x40, 0x84, 0xA2, 0x62, 0xA4, 0xFF, 0x3F}; // 0x3FFFA462A2844006DBF3, 1.28426009614491121036
and here is the source for the poly4 function:
inline long double poly4(double y)
{
__asm__(
"\n\t" "fld1" //1
"\n\t" "fldl %[y]" //y, 1
"\n\t" "fmul %%st(0), %%st(0)" //y^2, 1
"\n\t" "fdivp %%st(0), %%st(1)" //1/y^2=xsq
//den
"\n\t" "fldt %[s_NORMAL_q1]" //q1, xsq
"\n\t" "fadd %%st(1), %%st(0)" //q1+xsq, xsq
"\n\t" "fmul %%st(1), %%st(0)" //(q1+xsq)*xsq, xsq
"\n\t" "fldt %[s_NORMAL_q2]" //q2, (q1+xsq)*xsq, xsq
"\n\t" "faddp %%st(0), %%st(1)" //q2+(q1+xsq)*xsq, xsq
"\n\t" "fmul %%st(1), %%st(0)" //(q2+(q1+xsq)*xsq)*xsq, xsq
"\n\t" "fldt %[s_NORMAL_q3]" //q3, (q2+(q1+xsq)*xsq)*xsq, xsq
"\n\t" "faddp %%st(0), %%st(1)" //q3+(q2+(q1+xsq)*xsq)*xsq, xsq
"\n\t" "fmul %%st(1), %%st(0)" //(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq, xsq
"\n\t" "fldt %[s_NORMAL_q4]" //q4, (q3+(q2+(q1+xsq)*xsq)*xsq)*xsq, xsq
"\n\t" "faddp %%st(0), %%st(1)" //q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq, xsq
"\n\t" "fmul %%st(1), %%st(0)" //(q4+(q3+(q2+(q1+ xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_q5]" //q5, (q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
//num
"\n\t" "fldt %[s_NORMAL_p0]" //p0, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //p0*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p1]" //p1, p0*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p1+p0*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //(p1+p0*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p2]" //p2, (p1+p0*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p2+(p1+p0*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //(p2+(p1+p0*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p3]" //p3, (p2+(p1+p0*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p3+(p2+(p1+p0*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p4]" //p4, (p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmul %%st(2), %%st(0)" //(p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fldt %[s_NORMAL_p5]" //p5, (p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "faddp %%st(0), %%st(1)" //p5+(p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq)*xsq, q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq), xsq
"\n\t" "fmulp %%st(0), %%st(2)" //q5+(q4+(q3+(q2+(q1+xsq)*xsq)*xsq)*xsq)=den, (p5+(p4+(p3+(p2+(p1+p0*xsq)*xsq)*xsq)*xsq)*xsq)*xsq=num
// num/den
"\n\t" "fdivp %%st(0), %%st(1)" //num/den
"\n\t" "fldt %[s_oneOverRootTwoPi]" // oneOverRootTwoPi, num/den
"\n\t" "fsubrp %%st(0), %%st(1)" //oneOverRootTwoPi - num/den
"\n\t" "fldl %[y]" //y, (oneOverRootTwoPi - num/den)
"\n\t" "fdiv %%st(0), %%st(1)" //y, (oneOverRootTwoPi - num/den)/y
"\n\t" "sub $8, %%esp"
"\n\t" "fstpl (%%esp)" //(oneOverRootTwoPi - num/den)/y
"\n\t" "call (%P[exp_X2_2])"
"\n\t" "add $8, %%esp"
"\n\t" "fmulp %%st(0), %%st(1)"
"\n\t" "leave"
"\n\t" "ret"
:
: [y] "m" (y)
, [s_oneOverRootTwoPi] "m" (*s_oneOverRootTwoPi)
, [s_NORMAL_p0] "m" (*s_NORMAL_p0)
, [s_NORMAL_p1] "m" (*s_NORMAL_p1)
, [s_NORMAL_p2] "m" (*s_NORMAL_p2)
, [s_NORMAL_p3] "m" (*s_NORMAL_p3)
, [s_NORMAL_p4] "m" (*s_NORMAL_p4)
, [s_NORMAL_p5] "m" (*s_NORMAL_p5)
, [s_NORMAL_q1] "m" (*s_NORMAL_q1)
, [s_NORMAL_q2] "m" (*s_NORMAL_q2)
, [s_NORMAL_q3] "m" (*s_NORMAL_q3)
, [s_NORMAL_q4] "m" (*s_NORMAL_q4)
, [s_NORMAL_q5] "m" (*s_NORMAL_q5)
, [exp_X2_2] "i" (exp_X2_2)
:
);
}
This is the state of the FPU immediately prior to the ineffective load:
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
=>R6: Valid 0x3fecb7f59c22579f9f60 +2.7412088761933612e-006
R5: Empty 0x3ff6f7e2148bebef37f8
R4: Empty 0x000000020a0d00000007
R3: Empty 0xf1be000000000002021e
R2: Empty 0x00001697f1bf00003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff3320 PE C0 C1
TOP: 6
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff0fff
Instruction Pointer: 0x1b:0x00402dbd
Operand Pointer: 0xffff0023:0x004046f0
Opcode: 0xd8c9
I am wondering what is the meaning of the "C1" flag in the status word above - I cannot find documentation on this.
This is the state immediately after the failed fldt (executed by the stepi):
(gdb) stepi
0x00402dc5 1485 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
R6: Valid 0x3fecb7f59c22579f9f60 +2.7412088761933612e-006
=>R5: Zero 0x00000000000000000000 +0
R4: Empty 0x000000020a0d00000007
R3: Empty 0xf1be000000000002021e
R2: Empty 0x00001697f1bf00003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff2920 PE C0
TOP: 5
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff07ff
Instruction Pointer: 0x1b:0x00402dbf
Operand Pointer: 0xffff0023:0x0040cce6
Opcode: 0xdb2d
OK, I have now modified the code so that the instructions at 0x00402db5 and 0x00402dbf are identical. The first succeeds, the second fails. Here is a gdb session showing the disassembled code, and the fpu state immediately before execution of the two identical instructions. The only significant difference in the state is the presence of the C1 flag in the status prior to the execution of the second fldt instruction:
(gdb) disassemble
Dump of assembler code for function poly4(double):
0x00402d7c <+0>: push %ebp
0x00402d7d <+1>: mov %esp,%ebp
0x00402d7f <+3>: sub $0x8,%esp
0x00402d82 <+6>: mov 0x8(%ebp),%eax
0x00402d85 <+9>: mov %eax,-0x8(%ebp)
0x00402d88 <+12>: mov 0xc(%ebp),%eax
0x00402d8b <+15>: mov %eax,-0x4(%ebp)
=> 0x00402d8e <+18>: fld1
0x00402d90 <+20>: fldl -0x8(%ebp)
0x00402d93 <+23>: fmul %st(0),%st
0x00402d95 <+25>: fdivrp %st,%st(1)
0x00402d97 <+27>: fldt 0x40470e
0x00402d9d <+33>: fadd %st(1),%st
0x00402d9f <+35>: fmul %st(1),%st
0x00402da1 <+37>: fldt 0x404704
0x00402da7 <+43>: faddp %st,%st(1)
0x00402da9 <+45>: fmul %st(1),%st
0x00402dab <+47>: fldt 0x4046fa
0x00402db1 <+53>: faddp %st,%st(1)
0x00402db3 <+55>: fmul %st(1),%st
0x00402db5 <+57>: fldt 0x4046f0
0x00402dbb <+63>: faddp %st,%st(1)
0x00402dbd <+65>: fmul %st(1),%st
0x00402dbf <+67>: fldt 0x4046f0
0x00402dc5 <+73>: faddp %st,%st(1)
0x00402dc7 <+75>: fldt 0x4046dc
0x00402dcd <+81>: fmul %st(2),%st
0x00402dcf <+83>: fldt 0x4046d2
0x00402dd5 <+89>: faddp %st,%st(1)
0x00402dd7 <+91>: fmul %st(2),%st
0x00402dd9 <+93>: fldt 0x4046c8
0x00402ddf <+99>: faddp %st,%st(1)
0x00402de1 <+101>: fmul %st(2),%st
0x00402de3 <+103>: fldt 0x4046be
0x00402de9 <+109>: faddp %st,%st(1)
0x00402deb <+111>: fmul %st(2),%st
0x00402ded <+113>: fldt 0x4046b4
0x00402df3 <+119>: faddp %st,%st(1)
0x00402df5 <+121>: fmul %st(2),%st
0x00402df7 <+123>: fldt 0x4046aa
0x00402dfd <+129>: faddp %st,%st(1)
0x00402dff <+131>: fmulp %st,%st(2)
0x00402e01 <+133>: fdivrp %st,%st(1)
0x00402e03 <+135>: fldt 0x40408e
0x00402e09 <+141>: fsubrp %st,%st(1)
0x00402e0b <+143>: fldl -0x8(%ebp)
0x00402e0e <+146>: fdivr %st,%st(1)
0x00402e10 <+148>: sub $0x8,%esp
0x00402e13 <+151>: fstpl (%esp)
0x00402e16 <+154>: fwait
0x00402e17 <+155>: call 0x4013c0 <exp_X2_2(double)>
0x00402e1c <+160>: add $0x8,%esp
0x00402e1f <+163>: fmulp %st,%st(1)
0x00402e21 <+165>: fstl 0x406020
0x00402e27 <+171>: fld %st(0)
0x00402e29 <+173>: fsubl 0x406020
0x00402e2f <+179>: fildll 0x403020
0x00402e35 <+185>: fmulp %st,%st(1)
0x00402e37 <+187>: fstpl 0x406020
0x00402e3d <+193>: leave
0x00402e3e <+194>: ret
0x00402e3f <+195>: flds 0x40472c
0x00402e45 <+201>: leave
0x00402e46 <+202>: ret
End of assembler dump.
(gdb) tbreak *0x00402db5
Temporary breakpoint 61 at 0x402db5: file cody2.cpp, line 1489.
(gdb) continue
Continuing.
Temporary breakpoint 61, 0x00402db5 in poly4 (y=37.37840817302294) at cody2.cpp:1489
1489 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
=>R6: Valid 0x3ff0c71ba235b8f6a603 +4.7471033066735141e-005
R5: Empty 0x3ffb8724c9f09161c535
R4: Empty 0xf13d00000a0d00000007
R3: Empty 0x07ec000000000002021e
R2: Empty 0x000016cbc40900003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff3120 PE C0
TOP: 6
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff0fff
Instruction Pointer: 0x1b:0x00402db3
Operand Pointer: 0xffff0023:0x004046fa
Opcode: 0xd8c9
(gdb) stepi
0x00402dbb 1489 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
R6: Valid 0x3ff0c71ba235b8f6a603 +4.7471033066735141e-005
=>R5: Valid 0x3ff6f7e2148bebef37f8 +0.0037823963320275824
R4: Empty 0xf13d00000a0d00000007
R3: Empty 0x07ec000000000002021e
R2: Empty 0x000016cbc40900003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff2920 PE C0
TOP: 5
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff03ff
Instruction Pointer: 0x1b:0x00402db5
Operand Pointer: 0xffff0023:0x004046f0
Opcode: 0xdb2d
(gdb) stepi
0x00402dbd 1489 );
(gdb) stepi
0x00402dbf 1489 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
=>R6: Valid 0x3fecb7f59c22579f9f60 +2.7412088761933612e-006
R5: Empty 0x3ff6f7e2148bebef37f8
R4: Empty 0xf13d00000a0d00000007
R3: Empty 0x07ec000000000002021e
R2: Empty 0x000016cbc40900003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff3320 PE C0 C1
TOP: 6
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff0fff
Instruction Pointer: 0x1b:0x00402dbd
Operand Pointer: 0xffff0023:0x004046f0
Opcode: 0xd8c9
(gdb) stepi
0x00402dc5 1489 );
(gdb) info float
R7: Valid 0x3ff4bba0d78724c01468 +0.00071574511983807409
R6: Valid 0x3fecb7f59c22579f9f60 +2.7412088761933612e-006
=>R5: Zero 0x00000000000000000000 +0
R4: Empty 0xf13d00000a0d00000007
R3: Empty 0x07ec000000000002021e
R2: Empty 0x000016cbc40900003654
R1: Empty 0x00020098007c00f8f0c0
R0: Empty 0x00077c81cc3b0002021e
Status Word: 0xffff2920 PE C0
TOP: 5
Control Word: 0xffff037f IM DM ZM OM UM PM
PC: Extended Precision (64-bits)
RC: Round to nearest
Tag Word: 0xffff07ff
Instruction Pointer: 0x1b:0x00402dbf
Operand Pointer: 0xffff0023:0x0040ccf0
Opcode: 0xdb2d
(gdb)
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
我注意到在调试器中,堆栈上有效条目下方的条目被标记为无效浮点值:
这使我相信您的 x87 堆栈可能在该例程开始执行之前已损坏。这可能是因为堆栈溢出,或者另一个例程使用了 MMX 指令,而没有发出恢复浮点状态所需的
emms
指令。在例程入口处中断并将info float
放入 gdb 中,并报告结果。I note that in the debugger, the entries on the stack below the valid entries are marked
invalid float value
:This leads me to believe that your x87 stack may be corrupted before this routine begins executing. This could be either because you have overflowed the stack, or another routine has used MMX instructions without issuing the required
emms
instruction to restore the floating-point state. Break at entry to your routine and putinfo float
in the gdb, and report the results.