转换为双倍和增量时的性能差异

发布于 2024-10-29 11:35:54 字数 2311 浏览 9 评论 0原文

我正在增加一个计数器，我需要在双精度算术循环之后使用它。那么，您希望哪个更快？（或者太接近了？）

代码 1：

double dubs = 3.14159265;
double d;
for(d=0; d<BIGNUM; d++) { /* do stuff not depending on d */ }
dubs /= d;

代码 2：

double dubs = 3.14159265;
int i;
for(i=0; i<BIGNUM; i++) { /* do stuff not depending on i */ }
dubs /= (double) i;

它取决于 BIGNUM 的大小吗？我知道这会是一个微小的差异，但只是发现自己在理论上想知道。

额外问题：如果是 C++，使用 static_cast 的答案有什么变化吗？

--编辑--

好的，这是示例代码和汇编程序：

#define BIGNUM 1000000000
#define NUMLOOPS 1000

double test1()
{
    double dubs = 3.14159265;
    double d;
    int k = 1;
    for(d=0; d<BIGNUM; d++) { k*= 2; }
    dubs /= d;
    return dubs;
}

double test2()
{
    double dubs = 3.14159265;
    int i;
    int k = 1;
    for(i=0; i<BIGNUM; i++) { k*= 2; }
    dubs /= (double)i;
    return dubs;
}

int main()
{
    double d1=0;
    double d2=0;
    int i;
    for(i=0; i<NUMLOOPS; i++)
    {
        d1 += test1();
        d2 += test2();
    }
}


_test1:
LFB2:
    pushq   %rbp
LCFI0:
    movq    %rsp, %rbp
LCFI1:
    subq    $48, %rsp
LCFI2:
    call mcount
    movabsq $4614256656543962353, %rax
    movq    %rax, -16(%rbp)
    movl    $1, -4(%rbp)
    movl    $0, %eax
    movq    %rax, -24(%rbp)
    jmp L2
L3:
    sall    -4(%rbp)
    movsd   -24(%rbp), %xmm0
    movsd   LC2(%rip), %xmm1
    addsd   %xmm1, %xmm0
    movsd   %xmm0, -24(%rbp)
L2:
    movsd   -24(%rbp), %xmm1
    movsd   LC3(%rip), %xmm0
    ucomisd %xmm1, %xmm0
    ja  L3
    movsd   -16(%rbp), %xmm0
    divsd   -24(%rbp), %xmm0
    movsd   %xmm0, -16(%rbp)
    movq    -16(%rbp), %rax
    movq    %rax, -40(%rbp)
    movsd   -40(%rbp), %xmm0
    leave
    ret


_test2:
LFB3:
    pushq   %rbp
LCFI3:
    movq    %rsp, %rbp
LCFI4:
    subq    $32, %rsp
LCFI5:
    call mcount
    movabsq $4614256656543962353, %rax
    movq    %rax, -16(%rbp)
    movl    $1, -8(%rbp)
    movl    $0, -4(%rbp)
    jmp L7
L8:
    sall    -8(%rbp)
    incl    -4(%rbp)
L7:
    cmpl    $99999, -4(%rbp)
    jle L8
    cvtsi2sd    -4(%rbp), %xmm1
    movsd   -16(%rbp), %xmm0
    divsd   %xmm1, %xmm0
    movsd   %xmm0, -16(%rbp)
    movq    -16(%rbp), %rax
    movq    %rax, -24(%rbp)
    movsd   -24(%rbp), %xmm0
    leave
    ret

测试当前正在运行......

原文

I'm incrementing a counter, which I will need to use after the loop in double arithmetic. So, which would you expect to be faster? (Or too close to call?)

Code 1:

double dubs = 3.14159265;
double d;
for(d=0; d<BIGNUM; d++) { /* do stuff not depending on d */ }
dubs /= d;

Code 2:

double dubs = 3.14159265;
int i;
for(i=0; i<BIGNUM; i++) { /* do stuff not depending on i */ }
dubs /= (double) i;

And does it depend on the size of BIGNUM? I know it would be a minuscule difference, but just found myself wondering in theory.

Bonus question: if it were C++, any change in your answer for using static_cast?

--Edit--

Ok, here's a sample code and assembler:

#define BIGNUM 1000000000
#define NUMLOOPS 1000

double test1()
{
    double dubs = 3.14159265;
    double d;
    int k = 1;
    for(d=0; d<BIGNUM; d++) { k*= 2; }
    dubs /= d;
    return dubs;
}

double test2()
{
    double dubs = 3.14159265;
    int i;
    int k = 1;
    for(i=0; i<BIGNUM; i++) { k*= 2; }
    dubs /= (double)i;
    return dubs;
}

int main()
{
    double d1=0;
    double d2=0;
    int i;
    for(i=0; i<NUMLOOPS; i++)
    {
        d1 += test1();
        d2 += test2();
    }
}


_test1:
LFB2:
    pushq   %rbp
LCFI0:
    movq    %rsp, %rbp
LCFI1:
    subq    $48, %rsp
LCFI2:
    call mcount
    movabsq $4614256656543962353, %rax
    movq    %rax, -16(%rbp)
    movl    $1, -4(%rbp)
    movl    $0, %eax
    movq    %rax, -24(%rbp)
    jmp L2
L3:
    sall    -4(%rbp)
    movsd   -24(%rbp), %xmm0
    movsd   LC2(%rip), %xmm1
    addsd   %xmm1, %xmm0
    movsd   %xmm0, -24(%rbp)
L2:
    movsd   -24(%rbp), %xmm1
    movsd   LC3(%rip), %xmm0
    ucomisd %xmm1, %xmm0
    ja  L3
    movsd   -16(%rbp), %xmm0
    divsd   -24(%rbp), %xmm0
    movsd   %xmm0, -16(%rbp)
    movq    -16(%rbp), %rax
    movq    %rax, -40(%rbp)
    movsd   -40(%rbp), %xmm0
    leave
    ret


_test2:
LFB3:
    pushq   %rbp
LCFI3:
    movq    %rsp, %rbp
LCFI4:
    subq    $32, %rsp
LCFI5:
    call mcount
    movabsq $4614256656543962353, %rax
    movq    %rax, -16(%rbp)
    movl    $1, -8(%rbp)
    movl    $0, -4(%rbp)
    jmp L7
L8:
    sall    -8(%rbp)
    incl    -4(%rbp)
L7:
    cmpl    $99999, -4(%rbp)
    jle L8
    cvtsi2sd    -4(%rbp), %xmm1
    movsd   -16(%rbp), %xmm0
    divsd   %xmm1, %xmm0
    movsd   %xmm0, -16(%rbp)
    movq    -16(%rbp), %rax
    movq    %rax, -24(%rbp)
    movsd   -24(%rbp), %xmm0
    leave
    ret

Test is currently running....

分享到QQ

分享到微博