实现基于查表的三角函数
对于我在业余时间实现的视频游戏,我尝试使用查找表实现我自己的 sinf()、cosf() 和 atan2f() 版本。 目的是使实现速度更快,但准确性较低。
我的初步实现如下。 这些函数可以工作,并返回良好的近似值。 唯一的问题是它们比调用标准 sinf()、cosf() 和 atan2f() 函数慢。
那么,我做错了什么?
// Geometry.h includes definitions of PI, TWO_PI, etc., as
// well as the prototypes for the public functions
#include "Geometry.h"
namespace {
// Number of entries in the sin/cos lookup table
const int SinTableCount = 512;
// Angle covered by each table entry
const float SinTableDelta = TWO_PI / (float)SinTableCount;
// Lookup table for Sin() results
float SinTable[SinTableCount];
// This object initializes the contents of the SinTable array exactly once
class SinTableInitializer {
public:
SinTableInitializer() {
for (int i = 0; i < SinTableCount; ++i) {
SinTable[i] = sinf((float)i * SinTableDelta);
}
}
};
static SinTableInitializer sinTableInitializer;
// Number of entries in the atan lookup table
const int AtanTableCount = 512;
// Interval covered by each Atan table entry
const float AtanTableDelta = 1.0f / (float)AtanTableCount;
// Lookup table for Atan() results
float AtanTable[AtanTableCount];
// This object initializes the contents of the AtanTable array exactly once
class AtanTableInitializer {
public:
AtanTableInitializer() {
for (int i = 0; i < AtanTableCount; ++i) {
AtanTable[i] = atanf((float)i * AtanTableDelta);
}
}
};
static AtanTableInitializer atanTableInitializer;
// Lookup result in table.
// Preconditions: y > 0, x > 0, y < x
static float AtanLookup2(float y, float x) {
assert(y > 0.0f);
assert(x > 0.0f);
assert(y < x);
const float ratio = y / x;
const int index = (int)(ratio / AtanTableDelta);
return AtanTable[index];
}
}
float Sin(float angle) {
// If angle is negative, reflect around X-axis and negate result
bool mustNegateResult = false;
if (angle < 0.0f) {
mustNegateResult = true;
angle = -angle;
}
// Normalize angle so that it is in the interval (0.0, PI)
while (angle >= TWO_PI) {
angle -= TWO_PI;
}
const int index = (int)(angle / SinTableDelta);
const float result = SinTable[index];
return mustNegateResult? (-result) : result;
}
float Cos(float angle) {
return Sin(angle + PI_2);
}
float Atan2(float y, float x) {
// Handle x == 0 or x == -0
// (See atan2(3) for specification of sign-bit handling.)
if (x == 0.0f) {
if (y > 0.0f) {
return PI_2;
}
else if (y < 0.0f) {
return -PI_2;
}
else if (signbit(x)) {
return signbit(y)? -PI : PI;
}
else {
return signbit(y)? -0.0f : 0.0f;
}
}
// Handle y == 0, x != 0
if (y == 0.0f) {
return (x > 0.0f)? 0.0f : PI;
}
// Handle y == x
if (y == x) {
return (x > 0.0f)? PI_4 : -(3.0f * PI_4);
}
// Handle y == -x
if (y == -x) {
return (x > 0.0f)? -PI_4 : (3.0f * PI_4);
}
// For other cases, determine quadrant and do appropriate lookup and calculation
bool right = (x > 0.0f);
bool top = (y > 0.0f);
if (right && top) {
// First quadrant
if (y < x) {
return AtanLookup2(y, x);
}
else {
return PI_2 - AtanLookup2(x, y);
}
}
else if (!right && top) {
// Second quadrant
const float posx = fabsf(x);
if (y < posx) {
return PI - AtanLookup2(y, posx);
}
else {
return PI_2 + AtanLookup2(posx, y);
}
}
else if (!right && !top) {
// Third quadrant
const float posx = fabsf(x);
const float posy = fabsf(y);
if (posy < posx) {
return -PI + AtanLookup2(posy, posx);
}
else {
return -PI_2 - AtanLookup2(posx, posy);
}
}
else { // right && !top
// Fourth quadrant
const float posy = fabsf(y);
if (posy < x) {
return -AtanLookup2(posy, x);
}
else {
return -PI_2 + AtanLookup2(x, posy);
}
}
return 0.0f;
}
For a videogame I'm implementing in my spare time, I've tried implementing my own versions of sinf(), cosf(), and atan2f(), using lookup tables. The intent is to have implementations that are faster, although with less accuracy.
My initial implementation is below. The functions work, and return good approximate values. The only problem is that they are slower than calling the standard sinf(), cosf(), and atan2f() functions.
So, what am I doing wrong?
// Geometry.h includes definitions of PI, TWO_PI, etc., as
// well as the prototypes for the public functions
#include "Geometry.h"
namespace {
// Number of entries in the sin/cos lookup table
const int SinTableCount = 512;
// Angle covered by each table entry
const float SinTableDelta = TWO_PI / (float)SinTableCount;
// Lookup table for Sin() results
float SinTable[SinTableCount];
// This object initializes the contents of the SinTable array exactly once
class SinTableInitializer {
public:
SinTableInitializer() {
for (int i = 0; i < SinTableCount; ++i) {
SinTable[i] = sinf((float)i * SinTableDelta);
}
}
};
static SinTableInitializer sinTableInitializer;
// Number of entries in the atan lookup table
const int AtanTableCount = 512;
// Interval covered by each Atan table entry
const float AtanTableDelta = 1.0f / (float)AtanTableCount;
// Lookup table for Atan() results
float AtanTable[AtanTableCount];
// This object initializes the contents of the AtanTable array exactly once
class AtanTableInitializer {
public:
AtanTableInitializer() {
for (int i = 0; i < AtanTableCount; ++i) {
AtanTable[i] = atanf((float)i * AtanTableDelta);
}
}
};
static AtanTableInitializer atanTableInitializer;
// Lookup result in table.
// Preconditions: y > 0, x > 0, y < x
static float AtanLookup2(float y, float x) {
assert(y > 0.0f);
assert(x > 0.0f);
assert(y < x);
const float ratio = y / x;
const int index = (int)(ratio / AtanTableDelta);
return AtanTable[index];
}
}
float Sin(float angle) {
// If angle is negative, reflect around X-axis and negate result
bool mustNegateResult = false;
if (angle < 0.0f) {
mustNegateResult = true;
angle = -angle;
}
// Normalize angle so that it is in the interval (0.0, PI)
while (angle >= TWO_PI) {
angle -= TWO_PI;
}
const int index = (int)(angle / SinTableDelta);
const float result = SinTable[index];
return mustNegateResult? (-result) : result;
}
float Cos(float angle) {
return Sin(angle + PI_2);
}
float Atan2(float y, float x) {
// Handle x == 0 or x == -0
// (See atan2(3) for specification of sign-bit handling.)
if (x == 0.0f) {
if (y > 0.0f) {
return PI_2;
}
else if (y < 0.0f) {
return -PI_2;
}
else if (signbit(x)) {
return signbit(y)? -PI : PI;
}
else {
return signbit(y)? -0.0f : 0.0f;
}
}
// Handle y == 0, x != 0
if (y == 0.0f) {
return (x > 0.0f)? 0.0f : PI;
}
// Handle y == x
if (y == x) {
return (x > 0.0f)? PI_4 : -(3.0f * PI_4);
}
// Handle y == -x
if (y == -x) {
return (x > 0.0f)? -PI_4 : (3.0f * PI_4);
}
// For other cases, determine quadrant and do appropriate lookup and calculation
bool right = (x > 0.0f);
bool top = (y > 0.0f);
if (right && top) {
// First quadrant
if (y < x) {
return AtanLookup2(y, x);
}
else {
return PI_2 - AtanLookup2(x, y);
}
}
else if (!right && top) {
// Second quadrant
const float posx = fabsf(x);
if (y < posx) {
return PI - AtanLookup2(y, posx);
}
else {
return PI_2 + AtanLookup2(posx, y);
}
}
else if (!right && !top) {
// Third quadrant
const float posx = fabsf(x);
const float posy = fabsf(y);
if (posy < posx) {
return -PI + AtanLookup2(posy, posx);
}
else {
return -PI_2 - AtanLookup2(posx, posy);
}
}
else { // right && !top
// Fourth quadrant
const float posy = fabsf(y);
if (posy < x) {
return -AtanLookup2(posy, x);
}
else {
return -PI_2 + AtanLookup2(x, posy);
}
}
return 0.0f;
}
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(5)
“过早的优化是万恶之源” - Donald Knuth
如今,编译器为三角函数提供了非常有效的内在函数,可以从现代处理器(SSE 等)中获得最佳性能,这解释了为什么你很难击败内置函数。 不要在这些部分上浪费太多时间,而是专注于可以使用分析器发现的真正瓶颈。
"Premature optimization is the root of all evil" - Donald Knuth
Nowadays compilers provide very efficient intrinsics for trigonometric functions that get the best from modern processors (SSE etc.), which explains why you can hardly beat the built-in functions. Don't lose too much time on these parts and instead concentrate on the real bottlenecks that you can spot with a profiler.
请记住,您有一个协处理器...如果是 1993 年,您会看到速度的提高...但是今天您将很难击败本机内在函数。
尝试查看 disassible to sinf。
Remember you have a co-processor ... you would have seen an increase in speed if it were 1993 ... however today you will struggle to beat native intrinsics.
Try viewing the disassebly to sinf.
有人已经对此进行了基准测试,看起来
Trig.Math
函数已经过优化,并且比您能想到的任何查找表都要快:http://www.tommti -systems.de/go.html?http://www.tommti-systems.de/main-Dateien/reviews/languages/benchmarks.html
(他们没有在页面上使用锚点,所以你必须向下滚动约 1/3)
Someone has already benchmarked this, and it looks as though the
Trig.Math
functions are already optimized, and will be faster than any lookup table you can come up with:http://www.tommti-systems.de/go.html?http://www.tommti-systems.de/main-Dateien/reviews/languages/benchmarks.html
(They didn't use anchors on the page so you have to scroll about 1/3 of the way down)
我对这个地方感到担心:
但你可以:
为所有功能添加时间计,编写特殊性能测试,运行性能测试,打印时间测试报告..我想经过这些测试你就会知道答案。
您还可以使用一些分析工具,例如 AQTime。
I'm worried by this place:
But you can:
Add time-meters to all functions, write special performance tests, run performance tests, print report of time test.. I think you will know answer after this tests.
Also you could use some profiling tools such as AQTime.
内置功能已经得到了很好的优化,因此要击败它们真的很困难。 就我个人而言,我会在其他地方寻找获得性能的地方。
也就是说,我可以在您的代码中看到一项优化:
可以替换为:
The built-in functions are very well optimized already, so it's going to be REALLY tough to beat them. Personally, I'd look elsewhere for places to gain performance.
That said, one optimization I can see in your code:
Could be replaced with: