当前位置：文江博客话题详情

这是 g++ 中的优化错误吗？

发布于 2024-12-01 04:52:13 字数 7388 浏览 1 评论 0原文

我不确定我是否在 g++ (4.4.1-4ubuntu9) 中发现了错误，或者我是否正在做有事吗。我相信我看到的是通过启用引入的错误使用 g++ -O2 进行优化。我试图将代码简化为相关部分。

启用优化后，我的 ASSERT 失败。什么时候优化被禁用，同样的ASSERT不会失败。我想我已经追踪了它归结为一个函数及其调用者的优化。

系统

语言：C++
Ubuntu 9.10
g++-4.4.real (Ubuntu 4.4.1-4ubuntu9) 4.4.1
Linux 2.6.31-22-server x86_64

优化启用

对象编译为： g++ -DHAVE_CONFIG_H -I。 -fPIC -g -O2 -MT file.o -MD -MP -MF .deps/file.Tpo -c -o file.o file.cpp

这是来自 objdump -dg 的相关代码文件.o。

00000000000018b0 <helper_function>:
;; This function takes two parameters:
;; pointer to int: %rdi
;; pointer to int[]: %rsi
    18b0:       0f b6 07                movzbl (%rdi),%eax
    18b3:       83 f8 12                cmp    $0x12,%eax
    18b6:       74 60                   je     1918 <helper_function+0x68>
    18b8:       83 f8 17                cmp    $0x17,%eax
    18bb:       74 5b                   je     1918 <helper_function+0x68>
...
    1918:       c7 06 32 00 00 00       movl   $0x32,(%rsi)
    191e:       66 90                   xchg   %ax,%ax
    1920:       c3                      retq  
    
0000000000005290 <buggy_invoker>:
... snip ...
    52a0:       48 81 ec c8 01 00 00    sub    $0x1c8,%rsp
    52a7:       48 8d 84 24 a0 01 00    lea    0x1a0(%rsp),%rax
    52ae:       00 
    52af:       48 c7 84 24 a0 01 00    movq   $0x0,0x1a0(%rsp)
    52b6:       00 00 00 00 00 
    52bb:       48 c7 84 24 a8 01 00    movq   $0x0,0x1a8(%rsp)
    52c2:       00 00 00 00 00 
    52c7:       c7 84 24 b0 01 00 00    movl   $0x0,0x1b0(%rsp)
    52ce:       00 00 00 00 
    52d2:       4c 8d 7c 24 20          lea    0x20(%rsp),%r15
    52d7:       48 89 c6                mov    %rax,%rsi
    52da:       48 89 44 24 08          mov    %rax,0x8(%rsp)
;; ***** BUG HERE *****
;; Pointer to int[] loaded into %rsi
;; But where is %rdi populated?
    52df:       e8 cc c5 ff ff          callq  18b0 <helper_function>

0000000000005494 <perfectly_fine_invoker>:
    5494:       48 83 ec 20             sub    $0x20,%rsp
    5498:       0f ae f0                mfence 
    549b:       48 8d 7c 24 30          lea    0x30(%rsp),%rdi
    54a0:       48 89 e6                mov    %rsp,%rsi
    54a3:       48 c7 04 24 00 00 00    movq   $0x0,(%rsp)
    54aa:       00 
    54ab:       48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
    54b2:       00 00 
    54b4:       c7 44 24 10 00 00 00    movl   $0x0,0x10(%rsp)
    54bb:       00 
;; Non buggy invocation here: both %rdi and %rsi loaded correctly.
    54bc:       e8 ef c3 ff ff          callq  18b0 <helper_function>

禁用优化

现在编译为： g++ -DHAVE_CONFIG_H -I。 -fPIC -g -O0 -MT file.o -MD -MP -MF .deps/file.Tpo -c -o file.o file.cpp

0000000000008d27 <helper_function>:
;; Still the same parameters here, but it looks a little different.
... snip ...
    8d2b:       48 89 7d e8             mov    %rdi,-0x18(%rbp)
    8d2f:       48 89 75 e0             mov    %rsi,-0x20(%rbp)
    8d33:       48 8b 45 e8             mov    -0x18(%rbp),%rax
    8d37:       0f b6 00                movzbl (%rax),%eax
    8d3a:       0f b6 c0                movzbl %al,%eax
    8d3d:       89 45 fc                mov    %eax,-0x4(%rbp)
    8d40:       8b 45 fc                mov    -0x4(%rbp),%eax
    8d43:       83 f8 17                cmp    $0x17,%eax
    8d46:       74 40                   je     8d88 <helper_function+0x61>
...

000000000000948a <buggy_invoker>:
    948a:       55                      push   %rbp
    948b:       48 89 e5                mov    %rsp,%rbp
    948e:       41 54                   push   %r12
    9490:       53                      push   %rbx
    9491:       48 81 ec c0 01 00 00    sub    $0x1c0,%rsp
    9498:       48 89 bd 38 fe ff ff    mov    %rdi,-0x1c8(%rbp)
    949f:       48 89 b5 30 fe ff ff    mov    %rsi,-0x1d0(%rbp)
    94a6:       48 c7 45 c0 00 00 00    movq   $0x0,-0x40(%rbp)
    94ad:       00 
    94ae:       48 c7 45 c8 00 00 00    movq   $0x0,-0x38(%rbp)
    94b5:       00 
    94b6:       c7 45 d0 00 00 00 00    movl   $0x0,-0x30(%rbp)
    94bd:       48 8d 55 c0             lea    -0x40(%rbp),%rdx
    94c1:       48 8b 85 38 fe ff ff    mov    -0x1c8(%rbp),%rax
    94c8:       48 89 d6                mov    %rdx,%rsi
    94cb:       48 89 c7                mov    %rax,%rdi
;; ***** NOT BUGGY HERE *****
;; Now, without optimization, both %rdi and %rsi loaded correctly.
    94ce:       e8 54 f8 ff ff          callq  8d27 <helper_function>

0000000000008eec <different_perfectly_fine_invoker>:
    8eec:       55                      push   %rbp
    8eed:       48 89 e5                mov    %rsp,%rbp
    8ef0:       48 83 ec 30             sub    $0x30,%rsp
    8ef4:       48 89 7d d8             mov    %rdi,-0x28(%rbp)
    8ef8:       48 c7 45 e0 00 00 00    movq   $0x0,-0x20(%rbp)
    8eff:       00 
    8f00:       48 c7 45 e8 00 00 00    movq   $0x0,-0x18(%rbp)
    8f07:       00 
    8f08:       c7 45 f0 00 00 00 00    movl   $0x0,-0x10(%rbp)
    8f0f:       48 8d 55 e0             lea    -0x20(%rbp),%rdx
    8f13:       48 8b 45 d8             mov    -0x28(%rbp),%rax
    8f17:       48 89 d6                mov    %rdx,%rsi
    8f1a:       48 89 c7                mov    %rax,%rdi
;; Another example of non-optimized call to that function.
    8f1d:       e8 05 fe ff ff          callq  8d27 <helper_function>

原始 C++ 代码

这是原始 C++ 的清理版本。我刚刚更改了一些名字并删除了不相关的代码。原谅我的偏执，我只是不想暴露太多来自未发布和未发布的工作的代码:-)。

static void helper_function(my_struct_t *e, int *outArr)
{
  unsigned char event_type = e->header.type;
  if (event_type == event_A || event_type == event_B) {
    outArr[0] = action_one;
  } else if (event_type == event_C) {
    outArr[0] = action_one;
    outArr[1] = action_two;
  } else if (...) { ... }
}

static void buggy_invoker(my_struct_t *e, predicate_t pred)
{
  // MAX_ACTIONS is #defined to 5
  int action_array[MAX_ACTIONS] = {0};
  helper_function(e, action_array);
  ...
}

static int has_any_actions(my_struct_t *e)
{
  int actions[MAX_ACTIONS] = {0};
  helper_function(e, actions);
  return actions[0] != 0;
}

// *** ENTRY POINT to this code is this function (note not static).
void perfectly_fine_invoker(my_struct_t e, predicate_t pred)
{
  memfence();
  if (has_any_actions(&e)) {
    buggy_invoker(&e, pred);
  }
  ...
}

如果您认为我混淆或消除了太多内容，请告诉我。用户此代码调用“perfectly_fine_invoker”。通过优化，g++ 优化了 'has_any_actions' 函数直接调用 'helper_function'，其中你可以在大会上看到。

问题

所以，我的问题是，对于其他人来说，它看起来像是一个有缺陷的优化吗？

如果有帮助，我可以发布原始 C++ 代码的清理版本。

这是我第一次在 Stack Overflow 上发帖，所以请告诉我是否可以做任何可以使问题更清晰的内容，或提供任何其他信息。

答案

编辑（事后几天）：

我接受了下面对我的问题的答案——这不是 g++ 中的优化错误，我只是看错了汇编代码。

然而，对于将来可能看到这个问题的人来说，我已经找到了答案。我读了一些关于 C 中未定义行为的文章 ( http://blog.regehr.org/archives/213< /a> 和 http://blog.llvm.org/2011/05/what-every-c-programmer-should-know.html ）以及编译器优化具有未定义行为的函数的一些描述似乎出奇的熟悉。

我向函数“helper_function”添加了一些 NULL 指针检查，你瞧……错误消失了。我应该一开始就进行 NULL 指针检查，但显然没有让它们允许 g++ 做任何它想做的事情（在我的例子中，优化掉调用）。

希望这些信息对以后的人有所帮助。

原文

I'm not sure whether I've found a bug in g++ (4.4.1-4ubuntu9), or if I'm doing
something wrong. What I believe I'm seeing is a bug introduced by enabling
optimization with g++ -O2. I've tried to distill the code down to just the
relevant parts.

When optimization is enabled, I have an ASSERT which is failing. When
optimization is disabled, the same ASSERT does not fail. I think I've tracked
it down to the optimization of one function and its callers.

The System

Language: C++
Ubuntu 9.10
g++-4.4.real (Ubuntu 4.4.1-4ubuntu9) 4.4.1
Linux 2.6.31-22-server x86_64

Optimization Enabled

Object compiled with:
g++ -DHAVE_CONFIG_H -I. -fPIC -g -O2 -MT file.o -MD -MP -MF .deps/file.Tpo -c -o file.o file.cpp

And here is the relevant code from objdump -dg file.o.

00000000000018b0 <helper_function>:
;; This function takes two parameters:
;; pointer to int: %rdi
;; pointer to int[]: %rsi
    18b0:       0f b6 07                movzbl (%rdi),%eax
    18b3:       83 f8 12                cmp    $0x12,%eax
    18b6:       74 60                   je     1918 <helper_function+0x68>
    18b8:       83 f8 17                cmp    $0x17,%eax
    18bb:       74 5b                   je     1918 <helper_function+0x68>
...
    1918:       c7 06 32 00 00 00       movl   $0x32,(%rsi)
    191e:       66 90                   xchg   %ax,%ax
    1920:       c3                      retq  
    
0000000000005290 <buggy_invoker>:
... snip ...
    52a0:       48 81 ec c8 01 00 00    sub    $0x1c8,%rsp
    52a7:       48 8d 84 24 a0 01 00    lea    0x1a0(%rsp),%rax
    52ae:       00 
    52af:       48 c7 84 24 a0 01 00    movq   $0x0,0x1a0(%rsp)
    52b6:       00 00 00 00 00 
    52bb:       48 c7 84 24 a8 01 00    movq   $0x0,0x1a8(%rsp)
    52c2:       00 00 00 00 00 
    52c7:       c7 84 24 b0 01 00 00    movl   $0x0,0x1b0(%rsp)
    52ce:       00 00 00 00 
    52d2:       4c 8d 7c 24 20          lea    0x20(%rsp),%r15
    52d7:       48 89 c6                mov    %rax,%rsi
    52da:       48 89 44 24 08          mov    %rax,0x8(%rsp)
;; ***** BUG HERE *****
;; Pointer to int[] loaded into %rsi
;; But where is %rdi populated?
    52df:       e8 cc c5 ff ff          callq  18b0 <helper_function>

0000000000005494 <perfectly_fine_invoker>:
    5494:       48 83 ec 20             sub    $0x20,%rsp
    5498:       0f ae f0                mfence 
    549b:       48 8d 7c 24 30          lea    0x30(%rsp),%rdi
    54a0:       48 89 e6                mov    %rsp,%rsi
    54a3:       48 c7 04 24 00 00 00    movq   $0x0,(%rsp)
    54aa:       00 
    54ab:       48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
    54b2:       00 00 
    54b4:       c7 44 24 10 00 00 00    movl   $0x0,0x10(%rsp)
    54bb:       00 
;; Non buggy invocation here: both %rdi and %rsi loaded correctly.
    54bc:       e8 ef c3 ff ff          callq  18b0 <helper_function>

Optimization Disabled

Now compiled with:
g++ -DHAVE_CONFIG_H -I. -fPIC -g -O0 -MT file.o -MD -MP -MF .deps/file.Tpo -c -o file.o file.cpp

0000000000008d27 <helper_function>:
;; Still the same parameters here, but it looks a little different.
... snip ...
    8d2b:       48 89 7d e8             mov    %rdi,-0x18(%rbp)
    8d2f:       48 89 75 e0             mov    %rsi,-0x20(%rbp)
    8d33:       48 8b 45 e8             mov    -0x18(%rbp),%rax
    8d37:       0f b6 00                movzbl (%rax),%eax
    8d3a:       0f b6 c0                movzbl %al,%eax
    8d3d:       89 45 fc                mov    %eax,-0x4(%rbp)
    8d40:       8b 45 fc                mov    -0x4(%rbp),%eax
    8d43:       83 f8 17                cmp    $0x17,%eax
    8d46:       74 40                   je     8d88 <helper_function+0x61>
...

000000000000948a <buggy_invoker>:
    948a:       55                      push   %rbp
    948b:       48 89 e5                mov    %rsp,%rbp
    948e:       41 54                   push   %r12
    9490:       53                      push   %rbx
    9491:       48 81 ec c0 01 00 00    sub    $0x1c0,%rsp
    9498:       48 89 bd 38 fe ff ff    mov    %rdi,-0x1c8(%rbp)
    949f:       48 89 b5 30 fe ff ff    mov    %rsi,-0x1d0(%rbp)
    94a6:       48 c7 45 c0 00 00 00    movq   $0x0,-0x40(%rbp)
    94ad:       00 
    94ae:       48 c7 45 c8 00 00 00    movq   $0x0,-0x38(%rbp)
    94b5:       00 
    94b6:       c7 45 d0 00 00 00 00    movl   $0x0,-0x30(%rbp)
    94bd:       48 8d 55 c0             lea    -0x40(%rbp),%rdx
    94c1:       48 8b 85 38 fe ff ff    mov    -0x1c8(%rbp),%rax
    94c8:       48 89 d6                mov    %rdx,%rsi
    94cb:       48 89 c7                mov    %rax,%rdi
;; ***** NOT BUGGY HERE *****
;; Now, without optimization, both %rdi and %rsi loaded correctly.
    94ce:       e8 54 f8 ff ff          callq  8d27 <helper_function>

0000000000008eec <different_perfectly_fine_invoker>:
    8eec:       55                      push   %rbp
    8eed:       48 89 e5                mov    %rsp,%rbp
    8ef0:       48 83 ec 30             sub    $0x30,%rsp
    8ef4:       48 89 7d d8             mov    %rdi,-0x28(%rbp)
    8ef8:       48 c7 45 e0 00 00 00    movq   $0x0,-0x20(%rbp)
    8eff:       00 
    8f00:       48 c7 45 e8 00 00 00    movq   $0x0,-0x18(%rbp)
    8f07:       00 
    8f08:       c7 45 f0 00 00 00 00    movl   $0x0,-0x10(%rbp)
    8f0f:       48 8d 55 e0             lea    -0x20(%rbp),%rdx
    8f13:       48 8b 45 d8             mov    -0x28(%rbp),%rax
    8f17:       48 89 d6                mov    %rdx,%rsi
    8f1a:       48 89 c7                mov    %rax,%rdi
;; Another example of non-optimized call to that function.
    8f1d:       e8 05 fe ff ff          callq  8d27 <helper_function>

The Original C++ Code

This is a sanitized version of the original C++. I've just changed some names
and removed irrelevant code. Forgive my paranoia, I just don't want to expose
too much code from unpublished and unreleased work :-).

static void helper_function(my_struct_t *e, int *outArr)
{
  unsigned char event_type = e->header.type;
  if (event_type == event_A || event_type == event_B) {
    outArr[0] = action_one;
  } else if (event_type == event_C) {
    outArr[0] = action_one;
    outArr[1] = action_two;
  } else if (...) { ... }
}

static void buggy_invoker(my_struct_t *e, predicate_t pred)
{
  // MAX_ACTIONS is #defined to 5
  int action_array[MAX_ACTIONS] = {0};
  helper_function(e, action_array);
  ...
}

static int has_any_actions(my_struct_t *e)
{
  int actions[MAX_ACTIONS] = {0};
  helper_function(e, actions);
  return actions[0] != 0;
}

// *** ENTRY POINT to this code is this function (note not static).
void perfectly_fine_invoker(my_struct_t e, predicate_t pred)
{
  memfence();
  if (has_any_actions(&e)) {
    buggy_invoker(&e, pred);
  }
  ...
}

If you think I've obfuscated or eliminiated too much, let me know. Users of
this code call 'perfectly_fine_invoker'. With optimization, g++ optimizes the
'has_any_actions' function away into a direct call to 'helper_function', which
you can see in the assembly.

The Question

So, my question is, does it look like a buggy optimization to anyone else?

If it would be helpful, I could post a sanitized version of the original C++ code.

This is my first posting to Stack Overflow, so please let me know if I can do
anything to make the question clearer, or provide any additional information.

The Answer

Edit (several days after the fact):

I accepted an answer below to my question -- it was not an optimization bug in g++, I was just looking at the assembly code wrong.

However, for whoever may be viewing this question in the future, I've found the answer. I did some reading on undefined behavior in C ( http://blog.regehr.org/archives/213 and http://blog.llvm.org/2011/05/what-every-c-programmer-should-know.html ) and some of the descriptions of the compiler optimizing away functions with undefined behavior seemed eerily familiar.

I added some NULL-pointer checks to the function 'helper_function' and lo and behold... bug goes away. I should have had the NULL-pointer checks to begin with, but apparently not having them allowed g++ to do whatever it wanted (in my case, optimize away the call).

Hope this information helps someone down the road.

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

马蹄踏│碎落叶 2024-12-08 04:52:13

我认为你看错了东西。我想编译器注意到你的函数很短并且没有触及 %rdi 寄存器，所以它只是不管它（你有与第一个参数相同的变量，我猜这就是放置的内容在 %rdi 中，请参阅第 21 页 http://www.x86-64.org/documentation/abi.pdf）

如果您查看未优化的版本，它会在此保存 %rdi 注册行

9498:       48 89 bd 38 fe ff ff    mov    %rdi,-0x1c8(%rbp)

...然后在调用 helper_function 之前，它将保存的值移动到 %rax 中，然后再移动到 %rdi 中。

94c1:       48 8b 85 38 fe ff ff    mov    -0x1c8(%rbp),%rax
94c8:       48 89 d6                mov    %rdx,%rsi
94cb:       48 89 c7                mov    %rax,%rdi

当优化它时，编译器只是摆脱所有来回移动。

I think you are looking at the wrong thing. I imagine the compiler notice that your function is short and doesn't touch the %rdi register so it just leaves it alone (you have the same variable as the first parameter, which I guess is what is placed in %rdi. See page 21 here http://www.x86-64.org/documentation/abi.pdf)

If you look at the unoptimized version it saves the %rdi register on this line

9498:       48 89 bd 38 fe ff ff    mov    %rdi,-0x1c8(%rbp)

...and then later just before calling helper_function it moves the saved value into %rax that is moved into %rdi.

94c1:       48 8b 85 38 fe ff ff    mov    -0x1c8(%rbp),%rax
94c8:       48 89 d6                mov    %rdx,%rsi
94cb:       48 89 c7                mov    %rax,%rdi

When optimizing it the compiler just get rid of all that moving back and forth.

回复收藏 0 原文

~没有更多了~

关于作者

筱武穆

暂无简介

文章

925 人气

关注发私信

友情链接

文江博客

这是 g++ 中的优化错误吗？

系统

优化启用

禁用优化

原始 C++ 代码

问题

答案

The System

Optimization Enabled

Optimization Disabled

The Original C++ Code

The Question

The Answer

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（1）

关于作者

相关话题

热门标签

推荐作者

甲如呢乙后呢

王权女流氓

云雾

wyh2033345759

乖乖

qq_xR3jkM

友情链接

这是 g++ 中的优化错误吗？

系统

优化启用

禁用优化

原始 C++ 代码

问题

答案

The System

Optimization Enabled

Optimization Disabled

The Original C++ Code

The Question

The Answer

如果你对这篇内容有疑问，欢迎到本站社区发帖提问 参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（1）

关于作者

相关话题

热门标签

推荐作者

甲如呢乙后呢

王权女流氓

云雾

wyh2033345759

乖乖

qq_xR3jkM

友情链接

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。