如何在 C++ 中优化简单的数字类型包装类？

发布于 2024-11-25 09:25:53 字数 1976 浏览 6 评论 0原文

我正在尝试用 C++ 实现定点类，但遇到性能问题。我已将问题简化为浮动类型的简单包装器，但它仍然很慢。我的问题是 - 为什么编译器无法完全优化它？

“float”版本比“Float”快 50%。为什么？！

（我使用 Visual C++ 2008，测试了所有可能的编译器选项，当然还有发布配置）。

请参阅下面的代码：

#include <cstdio>
#include <cstdlib>
#include "Clock.h"      // just for measuring time

#define real Float      // Option 1
//#define real float        // Option 2

struct Float
{
private:
    float value;

public:
    Float(float value) : value(value) {}
    operator float() { return value; }

    Float& operator=(const Float& rhs)
    {
        value = rhs.value;
        return *this;
    }

    Float operator+ (const Float& rhs) const
    {
        return Float( value + rhs.value );
    }

    Float operator- (const Float& rhs) const
    {
        return Float( value - rhs.value );
    }

    Float operator* (const Float& rhs) const
    {
        return Float( value * rhs.value );
    }

    bool operator< (const Float& rhs) const
    {
        return value < rhs.value;
    }
};

struct Point
{
    Point() : x(0), y(0) {}
    Point(real x, real y) : x(x), y(y) {}

    real x;
    real y;
};

int main()
{
    // Generate data
    const int N = 30000;
    Point points[N];
    for (int i = 0; i < N; ++i)
    {
        points[i].x = (real)(640.0f * rand() / RAND_MAX);
        points[i].y = (real)(640.0f * rand() / RAND_MAX);
    }

    real limit( 20 * 20 );

    // Check how many pairs of points are closer than 20
    Clock clk;

    int count = 0;
    for (int i = 0; i < N; ++i)
    {
        for (int j = i + 1; j < N; ++j)
        {
            real dx = points[i].x - points[j].x;
            real dy = points[i].y - points[j].y;
            real d2 = dx * dx + dy * dy;
            if ( d2 < limit )
            {
                count++;
            }
        }
    }

    double time = clk.time();

    printf("%d\n", count);
    printf("TIME: %lf\n", time);

    return 0;
}

原文

I am trying to implement a fixed-point class in C++, but I face problems with performance. I have reduced the problem to a simple wrapper of the float type and it is still slow. My question is - why is the compiler unable optimize it fully?

The 'float' version is 50% faster than 'Float'. Why?!

(I use Visual C++ 2008, all possible compiler's options tested, Release configuration of course).

See the code below:

#include <cstdio>
#include <cstdlib>
#include "Clock.h"      // just for measuring time

#define real Float      // Option 1
//#define real float        // Option 2

struct Float
{
private:
    float value;

public:
    Float(float value) : value(value) {}
    operator float() { return value; }

    Float& operator=(const Float& rhs)
    {
        value = rhs.value;
        return *this;
    }

    Float operator+ (const Float& rhs) const
    {
        return Float( value + rhs.value );
    }

    Float operator- (const Float& rhs) const
    {
        return Float( value - rhs.value );
    }

    Float operator* (const Float& rhs) const
    {
        return Float( value * rhs.value );
    }

    bool operator< (const Float& rhs) const
    {
        return value < rhs.value;
    }
};

struct Point
{
    Point() : x(0), y(0) {}
    Point(real x, real y) : x(x), y(y) {}

    real x;
    real y;
};

int main()
{
    // Generate data
    const int N = 30000;
    Point points[N];
    for (int i = 0; i < N; ++i)
    {
        points[i].x = (real)(640.0f * rand() / RAND_MAX);
        points[i].y = (real)(640.0f * rand() / RAND_MAX);
    }

    real limit( 20 * 20 );

    // Check how many pairs of points are closer than 20
    Clock clk;

    int count = 0;
    for (int i = 0; i < N; ++i)
    {
        for (int j = i + 1; j < N; ++j)
        {
            real dx = points[i].x - points[j].x;
            real dy = points[i].y - points[j].y;
            real d2 = dx * dx + dy * dy;
            if ( d2 < limit )
            {
                count++;
            }
        }
    }

    double time = clk.time();

    printf("%d\n", count);
    printf("TIME: %lf\n", time);

    return 0;
}

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

晌融 2024-12-02 09:25:53

IMO，它与优化标志有关。我在 g++ linux-64 机器上检查了你的程序。在没有任何优化的情况下，它给出的结果与您告诉的结果相同，少 50%。

保持最大优化打开（即-O4）。两个版本都是一样的。打开优化并检查。

回复收藏 0 原文

以歌曲疗慰 2024-12-02 09:25:53

尝试不通过引用传递。您的类足够小，以至于通过引用传递它的开销（是的，如果编译器没有优化它，就会产生开销），可能比仅仅复制类要高。所以这......

Float operator+ (const Float& rhs) const
{
   return Float( value + rhs.value );
}

变成这样......

Float operator+ (Float rhs) const
{
   rhs.value+=value;
   return rhs;
}

这避免了临时对象并且可以避免指针取消引用的一些间接。

Try not passing by reference. Your class is small enough that the overhead of passing it by reference (yes there is overhead if the compiler doesn't optimize it out), might be higher than just copying the class. So this...

Float operator+ (const Float& rhs) const
{
   return Float( value + rhs.value );
}

becomes something like this...

Float operator+ (Float rhs) const
{
   rhs.value+=value;
   return rhs;
}

which avoids a temporary object and may avoid some indirection of a pointer dereference.

回复收藏 0 原文

仅此而已 2024-12-02 09:25:53

经过进一步调查，我完全确信这是编译器优化管道的问题。与使用非封装浮点相比，此实例中生成的代码明显糟糕。我的建议是向 Microsoft 报告这个潜在问题，看看他们对此有何评论。我还建议您继续实现此类的计划定点版本，因为为整数生成的代码看起来是最佳的。

回复收藏 0 原文

~没有更多了~

关于作者

行雁书

暂无简介

文章

28 人气

关注发私信

友情链接

文江博客

如何在 C++ 中优化简单的数字类型包装类？

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（3）

关于作者

相关话题

热门标签

推荐作者

李珊平

Quxin

范无咎

github_ZOJ2N8YxBm

若言

南…巷孤猫

友情链接

如何在 C++ 中优化简单的数字类型包装类？

如果你对这篇内容有疑问，欢迎到本站社区发帖提问 参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（3）

关于作者

相关话题

热门标签

推荐作者

李珊平

Quxin

范无咎

github_ZOJ2N8YxBm

若言

南…巷孤猫

友情链接

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。