使用泰勒展开式的 sin(x) 汇编代码

发布于 2024-07-30 03:25:14 字数 81 浏览 5 评论 0原文

在 x86 Linux 中,如何使用 Taylor Expansion 在汇编代码中实现 sin(x)

In x86 Linux, how can I implement sin(x) in assembly code using Taylor Expansion?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

半衾梦 2024-08-06 03:25:14

你没有说明是哪种CPU架构,所以我假设是x86。

最简单(也可能是最低效)的方法是在 RPN 中编写公式,它几乎可以直接映射到 FPU 指令。

示例,

代数公式: x - (x^3/3!) + (x^5/5!)

RPN : xxx * x * 3 2 * / - xx * x * x * x * 5 4 * 3 * 2 * / +

变成:

fld x
fld x
fld x
fmul
fld x
fmul
fild [const_3]
fild [const_2]
fmul
fdiv
fsub
fld x
fld x
fmul 
fld x
fmul
fld x
fmul
fld x
fmul
fild [const_5]
fild [const_4]
fmul
fild [const_3]
fmul
fild [const_2]
fmul
fdiv
fadd

有一些明显的优化策略 -

  • 而不是计算 x, xxx,
    xxxxx 等对于每个术语,存储
    “运行产品”并相乘
    每次 x*x
  • 而不是
    计算每个的阶乘
    术语,做同样的“运行产品”

这里是x86 FPU的一些注释代码,每个FPU指令后面的注释显示该指令执行后的堆栈状态,堆栈顶部(st0)位于左侧,例如:

fldz ; 0
fld1 ; 1, 0

--snip- -

bits 32

section .text

extern printf
extern atof
extern atoi
extern puts
global main

taylor_sin:
  push eax
  push ecx

  ; input :
  ;  st(0) = x, value to approximate sin(x) of
  ;  [esp+12] = number of taylor series terms

  ; variables we'll use :
  ; s = sum of all terms (final result)
  ; x = value we want to take the sin of
  ; fi = factorial index (1, 3, 5, 7, ...)
  ; fc = factorial current (1, 6, 120, 5040, ...)
  ; n = numerator of term (x, x^3, x^5, x^7, ...)

  ; setup state for each iteration (term)
  fldz ; s x
  fxch st1 ; x s
  fld1 ; fi x s
  fld1 ; fc fi x s
  fld st2 ; n fc fi x s

  ; first term
  fld st1 ; fc n fc fi x s
  fdivr st0,st1 ; r n fc fi x s
  faddp st5,st0 ; n fc fi x s

  ; loop through each term
  mov ecx,[esp+12] ; number of terms
  xor eax,eax ; zero add/sub counter

loop_term:
  ; calculate next odd factorial
  fld1 ; 1 n fc fi x s
  faddp st3 ; n fc fi x s
  fld st2 ; fi n fc fi x s
  fmulp st2,st0
  fld1 ; 1 n fc fi x s
  faddp st3 ; n fc fi x s
  fld st2 ; fi n fc fi x s
  fmulp st2,st0 ; n fc fi x s

  ; calculate next odd power of x
  fmul st0,st3 ; n*x fc fi x s
  fmul st0,st3 ; n*x*x fc fi x s

  ; divide power by factorial
  fld st1 ; fc n fc fi x s
  fdivr st0,st1 ; r n fc fi x s

  ; check if we need to add or subtract this term
  test eax,1
  jnz odd_term
  fsubp st5,st0 ; n fc fi x s
  jmp skip
odd_term:
  ; accumulate result
  faddp st5,st0 ; n fc fi x s
skip:
  inc eax ; increment add/sub counter
  loop loop_term

  ; unstack work variables
  fstp st0
  fstp st0
  fstp st0
  fstp st0

  ; result is in st(0)

  pop ecx
  pop eax

  ret

main:

  ; check if we have 2 command-line args
  mov eax, [esp+4]
  cmp eax, 3
  jnz error

  ; get arg 1 - value to calc sin of
  mov ebx, [esp+8]
  push dword [ebx+4]
  call atof
  add esp, 4

  ; get arg 2 - number of taylor series terms
  mov ebx, [esp+8]
  push dword [ebx+8]
  call atoi
  add esp, 4

  ; do the taylor series approximation
  push eax
  call taylor_sin
  add esp, 4

  ; output result
  sub esp, 8
  fstp qword [esp]
  push format
  call printf
  add esp,12

  ; return to libc
  xor eax,eax
  ret

error:
  push error_message
  call puts
  add esp,4
  mov eax,1
  ret

section .data

error_message: db "syntax: <x> <terms>",0
format: db "%0.10f",10,0

运行程序:

$ ./taylor-sine 0.5 1
0.4791666667
$ ./taylor-sine 0.5 5
0.4794255386
$ echo "s(0.5)"|bc -l
.47942553860420300027

You don't state which CPU architecture so I'm assuming x86.

The simplist (and possibly most inefficient) way would be to write the formula in RPN, which can be mapped almost directly to FPU instructions.

Example,

algebraic formula : x - (x^3/3!) + (x^5/5!)

RPN : x x x * x * 3 2 * / - x x * x * x * x * 5 4 * 3 * 2 * / +

which becomes :

fld x
fld x
fld x
fmul
fld x
fmul
fild [const_3]
fild [const_2]
fmul
fdiv
fsub
fld x
fld x
fmul 
fld x
fmul
fld x
fmul
fld x
fmul
fild [const_5]
fild [const_4]
fmul
fild [const_3]
fmul
fild [const_2]
fmul
fdiv
fadd

There are some obvious optimisation strategies -

  • instead of calculating x, xxx,
    xxxxx etc for each term, store a
    'running product' and just multiply
    by x*x each time
  • instead of
    calculating the factorial for each
    term, do the same 'running product'

Here's some commented code for x86 FPU, the comments after each FPU instruction show the stack state after that instruction has executed, with the stack top (st0) on the left, eg :

fldz ; 0
fld1 ; 1, 0

--snip--

bits 32

section .text

extern printf
extern atof
extern atoi
extern puts
global main

taylor_sin:
  push eax
  push ecx

  ; input :
  ;  st(0) = x, value to approximate sin(x) of
  ;  [esp+12] = number of taylor series terms

  ; variables we'll use :
  ; s = sum of all terms (final result)
  ; x = value we want to take the sin of
  ; fi = factorial index (1, 3, 5, 7, ...)
  ; fc = factorial current (1, 6, 120, 5040, ...)
  ; n = numerator of term (x, x^3, x^5, x^7, ...)

  ; setup state for each iteration (term)
  fldz ; s x
  fxch st1 ; x s
  fld1 ; fi x s
  fld1 ; fc fi x s
  fld st2 ; n fc fi x s

  ; first term
  fld st1 ; fc n fc fi x s
  fdivr st0,st1 ; r n fc fi x s
  faddp st5,st0 ; n fc fi x s

  ; loop through each term
  mov ecx,[esp+12] ; number of terms
  xor eax,eax ; zero add/sub counter

loop_term:
  ; calculate next odd factorial
  fld1 ; 1 n fc fi x s
  faddp st3 ; n fc fi x s
  fld st2 ; fi n fc fi x s
  fmulp st2,st0
  fld1 ; 1 n fc fi x s
  faddp st3 ; n fc fi x s
  fld st2 ; fi n fc fi x s
  fmulp st2,st0 ; n fc fi x s

  ; calculate next odd power of x
  fmul st0,st3 ; n*x fc fi x s
  fmul st0,st3 ; n*x*x fc fi x s

  ; divide power by factorial
  fld st1 ; fc n fc fi x s
  fdivr st0,st1 ; r n fc fi x s

  ; check if we need to add or subtract this term
  test eax,1
  jnz odd_term
  fsubp st5,st0 ; n fc fi x s
  jmp skip
odd_term:
  ; accumulate result
  faddp st5,st0 ; n fc fi x s
skip:
  inc eax ; increment add/sub counter
  loop loop_term

  ; unstack work variables
  fstp st0
  fstp st0
  fstp st0
  fstp st0

  ; result is in st(0)

  pop ecx
  pop eax

  ret

main:

  ; check if we have 2 command-line args
  mov eax, [esp+4]
  cmp eax, 3
  jnz error

  ; get arg 1 - value to calc sin of
  mov ebx, [esp+8]
  push dword [ebx+4]
  call atof
  add esp, 4

  ; get arg 2 - number of taylor series terms
  mov ebx, [esp+8]
  push dword [ebx+8]
  call atoi
  add esp, 4

  ; do the taylor series approximation
  push eax
  call taylor_sin
  add esp, 4

  ; output result
  sub esp, 8
  fstp qword [esp]
  push format
  call printf
  add esp,12

  ; return to libc
  xor eax,eax
  ret

error:
  push error_message
  call puts
  add esp,4
  mov eax,1
  ret

section .data

error_message: db "syntax: <x> <terms>",0
format: db "%0.10f",10,0

running the program :

$ ./taylor-sine 0.5 1
0.4791666667
$ ./taylor-sine 0.5 5
0.4794255386
$ echo "s(0.5)"|bc -l
.47942553860420300027
骄兵必败 2024-08-06 03:25:14

为什么? 自 80387(大约 1987 年)处理器源以来,已经存在 FCOS 和 FSIN 操作码

http://ref.x86asm .net/coder32.html

维基百科

演示场景中的个人朋友

Why? There is already a FCOS and FSIN opcode since the 80387 (circa 1987) processor

source:

http://ref.x86asm.net/coder32.html

Wikipedia

Personal friend from demo scene

简单气质女生网名 2024-08-06 03:25:14
long double LD_COS(long double __X)
{
    register long double __VALUE;

    __asm__ __volatile__(
        "fcos                  \n\t"
        : "=t" (__VALUE)
        : "0" (__X)
    );
    return __VALUE;
}

long double LD_SIN(long double __X)
{
    register long double __VALUE;

    __asm__ __volatile__(
        "fsin                  \n\t"
        : "=t" (__VALUE)
        : "0" (__X)
    );
    return __VALUE;
}
long double LD_COS(long double __X)
{
    register long double __VALUE;

    __asm__ __volatile__(
        "fcos                  \n\t"
        : "=t" (__VALUE)
        : "0" (__X)
    );
    return __VALUE;
}

long double LD_SIN(long double __X)
{
    register long double __VALUE;

    __asm__ __volatile__(
        "fsin                  \n\t"
        : "=t" (__VALUE)
        : "0" (__X)
    );
    return __VALUE;
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文