这是 g++ 中的优化错误吗?

发布于 2024-12-01 04:52:13 字数 7388 浏览 1 评论 0原文

我不确定我是否在 g++ (4.4.1-4ubuntu9) 中发现了错误,或者我是否正在做 有事吗。我相信我看到的是通过启用引入的错误 使用 g++ -O2 进行优化。我试图将代码简化为 相关部分。

启用优化后,我的 ASSERT 失败。什么时候 优化被禁用,同样的ASSERT不会失败。我想我已经追踪了 它归结为一个函数及其调用者的优化。

系统

  • 语言:C++
  • Ubuntu 9.10
  • g++-4.4.real (Ubuntu 4.4.1-4ubuntu9) 4.4.1
  • Linux 2.6.31-22-server x86_64

优化启用

对象编译为: g++ -DHAVE_CONFIG_H -I。 -fPIC -g -O2 -MT file.o -MD -MP -MF .deps/file.Tpo -c -o file.o file.cpp

这是来自 objdump -dg 的相关代码文件.o。

00000000000018b0 <helper_function>:
;; This function takes two parameters:
;; pointer to int: %rdi
;; pointer to int[]: %rsi
    18b0:       0f b6 07                movzbl (%rdi),%eax
    18b3:       83 f8 12                cmp    $0x12,%eax
    18b6:       74 60                   je     1918 <helper_function+0x68>
    18b8:       83 f8 17                cmp    $0x17,%eax
    18bb:       74 5b                   je     1918 <helper_function+0x68>
...
    1918:       c7 06 32 00 00 00       movl   $0x32,(%rsi)
    191e:       66 90                   xchg   %ax,%ax
    1920:       c3                      retq  
    
0000000000005290 <buggy_invoker>:
... snip ...
    52a0:       48 81 ec c8 01 00 00    sub    $0x1c8,%rsp
    52a7:       48 8d 84 24 a0 01 00    lea    0x1a0(%rsp),%rax
    52ae:       00 
    52af:       48 c7 84 24 a0 01 00    movq   $0x0,0x1a0(%rsp)
    52b6:       00 00 00 00 00 
    52bb:       48 c7 84 24 a8 01 00    movq   $0x0,0x1a8(%rsp)
    52c2:       00 00 00 00 00 
    52c7:       c7 84 24 b0 01 00 00    movl   $0x0,0x1b0(%rsp)
    52ce:       00 00 00 00 
    52d2:       4c 8d 7c 24 20          lea    0x20(%rsp),%r15
    52d7:       48 89 c6                mov    %rax,%rsi
    52da:       48 89 44 24 08          mov    %rax,0x8(%rsp)
;; ***** BUG HERE *****
;; Pointer to int[] loaded into %rsi
;; But where is %rdi populated?
    52df:       e8 cc c5 ff ff          callq  18b0 <helper_function>

0000000000005494 <perfectly_fine_invoker>:
    5494:       48 83 ec 20             sub    $0x20,%rsp
    5498:       0f ae f0                mfence 
    549b:       48 8d 7c 24 30          lea    0x30(%rsp),%rdi
    54a0:       48 89 e6                mov    %rsp,%rsi
    54a3:       48 c7 04 24 00 00 00    movq   $0x0,(%rsp)
    54aa:       00 
    54ab:       48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
    54b2:       00 00 
    54b4:       c7 44 24 10 00 00 00    movl   $0x0,0x10(%rsp)
    54bb:       00 
;; Non buggy invocation here: both %rdi and %rsi loaded correctly.
    54bc:       e8 ef c3 ff ff          callq  18b0 <helper_function>

禁用优化

现在编译为: g++ -DHAVE_CONFIG_H -I。 -fPIC -g -O0 -MT file.o -MD -MP -MF .deps/file.Tpo -c -o file.o file.cpp

0000000000008d27 <helper_function>:
;; Still the same parameters here, but it looks a little different.
... snip ...
    8d2b:       48 89 7d e8             mov    %rdi,-0x18(%rbp)
    8d2f:       48 89 75 e0             mov    %rsi,-0x20(%rbp)
    8d33:       48 8b 45 e8             mov    -0x18(%rbp),%rax
    8d37:       0f b6 00                movzbl (%rax),%eax
    8d3a:       0f b6 c0                movzbl %al,%eax
    8d3d:       89 45 fc                mov    %eax,-0x4(%rbp)
    8d40:       8b 45 fc                mov    -0x4(%rbp),%eax
    8d43:       83 f8 17                cmp    $0x17,%eax
    8d46:       74 40                   je     8d88 <helper_function+0x61>
...

000000000000948a <buggy_invoker>:
    948a:       55                      push   %rbp
    948b:       48 89 e5                mov    %rsp,%rbp
    948e:       41 54                   push   %r12
    9490:       53                      push   %rbx
    9491:       48 81 ec c0 01 00 00    sub    $0x1c0,%rsp
    9498:       48 89 bd 38 fe ff ff    mov    %rdi,-0x1c8(%rbp)
    949f:       48 89 b5 30 fe ff ff    mov    %rsi,-0x1d0(%rbp)
    94a6:       48 c7 45 c0 00 00 00    movq   $0x0,-0x40(%rbp)
    94ad:       00 
    94ae:       48 c7 45 c8 00 00 00    movq   $0x0,-0x38(%rbp)
    94b5:       00 
    94b6:       c7 45 d0 00 00 00 00    movl   $0x0,-0x30(%rbp)
    94bd:       48 8d 55 c0             lea    -0x40(%rbp),%rdx
    94c1:       48 8b 85 38 fe ff ff    mov    -0x1c8(%rbp),%rax
    94c8:       48 89 d6                mov    %rdx,%rsi
    94cb:       48 89 c7                mov    %rax,%rdi
;; ***** NOT BUGGY HERE *****
;; Now, without optimization, both %rdi and %rsi loaded correctly.
    94ce:       e8 54 f8 ff ff          callq  8d27 <helper_function>

0000000000008eec <different_perfectly_fine_invoker>:
    8eec:       55                      push   %rbp
    8eed:       48 89 e5                mov    %rsp,%rbp
    8ef0:       48 83 ec 30             sub    $0x30,%rsp
    8ef4:       48 89 7d d8             mov    %rdi,-0x28(%rbp)
    8ef8:       48 c7 45 e0 00 00 00    movq   $0x0,-0x20(%rbp)
    8eff:       00 
    8f00:       48 c7 45 e8 00 00 00    movq   $0x0,-0x18(%rbp)
    8f07:       00 
    8f08:       c7 45 f0 00 00 00 00    movl   $0x0,-0x10(%rbp)
    8f0f:       48 8d 55 e0             lea    -0x20(%rbp),%rdx
    8f13:       48 8b 45 d8             mov    -0x28(%rbp),%rax
    8f17:       48 89 d6                mov    %rdx,%rsi
    8f1a:       48 89 c7                mov    %rax,%rdi
;; Another example of non-optimized call to that function.
    8f1d:       e8 05 fe ff ff          callq  8d27 <helper_function>

原始 C++ 代码

这是原始 C++ 的清理版本。我刚刚更改了一些名字 并删除了不相关的代码。原谅我的偏执,我只是不想暴露 太多来自未发布和未发布的工作的代码:-)。

static void helper_function(my_struct_t *e, int *outArr)
{
  unsigned char event_type = e->header.type;
  if (event_type == event_A || event_type == event_B) {
    outArr[0] = action_one;
  } else if (event_type == event_C) {
    outArr[0] = action_one;
    outArr[1] = action_two;
  } else if (...) { ... }
}

static void buggy_invoker(my_struct_t *e, predicate_t pred)
{
  // MAX_ACTIONS is #defined to 5
  int action_array[MAX_ACTIONS] = {0};
  helper_function(e, action_array);
  ...
}

static int has_any_actions(my_struct_t *e)
{
  int actions[MAX_ACTIONS] = {0};
  helper_function(e, actions);
  return actions[0] != 0;
}

// *** ENTRY POINT to this code is this function (note not static).
void perfectly_fine_invoker(my_struct_t e, predicate_t pred)
{
  memfence();
  if (has_any_actions(&e)) {
    buggy_invoker(&e, pred);
  }
  ...
}

如果您认为我混淆或消除了太多内容,请告诉我。用户 此代码调用“perfectly_fine_invoker”。通过优化,g++ 优化了 'has_any_actions' 函数直接调用 'helper_function',其中 你可以在大会上看到。

问题

所以,我的问题是,对于其他人来说,它看起来像是一个有缺陷的优化吗?

如果有帮助,我可以发布原始 C++ 代码的清理版本。

这是我第一次在 Stack Overflow 上发帖,所以请告诉我是否可以做 任何可以使问题更清晰的内容,或提供任何其他信息。

答案

编辑(事后几天):

我接受了下面对我的问题的答案——这不是 g++ 中的优化错误,我只是看错了汇编代码。

然而,对于将来可能看到这个问题的人来说,我已经找到了答案。我读了一些关于 C 中未定义行为的文章 ( http://blog.regehr.org/archives/213< /a> 和 http://blog.llvm.org/2011/05/what-every-c-programmer-should-know.html )以及编译器优化具有未定义行为的函数的一些描述似乎出奇的熟悉。

我向函数“helper_function”添加了一些 NULL 指针检查,你瞧……错误消失了。我应该一开始就进行 NULL 指针检查,但显然没有让它们允许 g++ 做任何它想做的事情(在我的例子中,优化掉调用)。

希望这些信息对以后的人有所帮助。

I'm not sure whether I've found a bug in g++ (4.4.1-4ubuntu9), or if I'm doing
something wrong. What I believe I'm seeing is a bug introduced by enabling
optimization with g++ -O2. I've tried to distill the code down to just the
relevant parts.

When optimization is enabled, I have an ASSERT which is failing. When
optimization is disabled, the same ASSERT does not fail. I think I've tracked
it down to the optimization of one function and its callers.

The System

  • Language: C++
  • Ubuntu 9.10
  • g++-4.4.real (Ubuntu 4.4.1-4ubuntu9) 4.4.1
  • Linux 2.6.31-22-server x86_64

Optimization Enabled

Object compiled with:
g++ -DHAVE_CONFIG_H -I. -fPIC -g -O2 -MT file.o -MD -MP -MF .deps/file.Tpo -c -o file.o file.cpp

And here is the relevant code from objdump -dg file.o.

00000000000018b0 <helper_function>:
;; This function takes two parameters:
;; pointer to int: %rdi
;; pointer to int[]: %rsi
    18b0:       0f b6 07                movzbl (%rdi),%eax
    18b3:       83 f8 12                cmp    $0x12,%eax
    18b6:       74 60                   je     1918 <helper_function+0x68>
    18b8:       83 f8 17                cmp    $0x17,%eax
    18bb:       74 5b                   je     1918 <helper_function+0x68>
...
    1918:       c7 06 32 00 00 00       movl   $0x32,(%rsi)
    191e:       66 90                   xchg   %ax,%ax
    1920:       c3                      retq  
    
0000000000005290 <buggy_invoker>:
... snip ...
    52a0:       48 81 ec c8 01 00 00    sub    $0x1c8,%rsp
    52a7:       48 8d 84 24 a0 01 00    lea    0x1a0(%rsp),%rax
    52ae:       00 
    52af:       48 c7 84 24 a0 01 00    movq   $0x0,0x1a0(%rsp)
    52b6:       00 00 00 00 00 
    52bb:       48 c7 84 24 a8 01 00    movq   $0x0,0x1a8(%rsp)
    52c2:       00 00 00 00 00 
    52c7:       c7 84 24 b0 01 00 00    movl   $0x0,0x1b0(%rsp)
    52ce:       00 00 00 00 
    52d2:       4c 8d 7c 24 20          lea    0x20(%rsp),%r15
    52d7:       48 89 c6                mov    %rax,%rsi
    52da:       48 89 44 24 08          mov    %rax,0x8(%rsp)
;; ***** BUG HERE *****
;; Pointer to int[] loaded into %rsi
;; But where is %rdi populated?
    52df:       e8 cc c5 ff ff          callq  18b0 <helper_function>

0000000000005494 <perfectly_fine_invoker>:
    5494:       48 83 ec 20             sub    $0x20,%rsp
    5498:       0f ae f0                mfence 
    549b:       48 8d 7c 24 30          lea    0x30(%rsp),%rdi
    54a0:       48 89 e6                mov    %rsp,%rsi
    54a3:       48 c7 04 24 00 00 00    movq   $0x0,(%rsp)
    54aa:       00 
    54ab:       48 c7 44 24 08 00 00    movq   $0x0,0x8(%rsp)
    54b2:       00 00 
    54b4:       c7 44 24 10 00 00 00    movl   $0x0,0x10(%rsp)
    54bb:       00 
;; Non buggy invocation here: both %rdi and %rsi loaded correctly.
    54bc:       e8 ef c3 ff ff          callq  18b0 <helper_function>

Optimization Disabled

Now compiled with:
g++ -DHAVE_CONFIG_H -I. -fPIC -g -O0 -MT file.o -MD -MP -MF .deps/file.Tpo -c -o file.o file.cpp

0000000000008d27 <helper_function>:
;; Still the same parameters here, but it looks a little different.
... snip ...
    8d2b:       48 89 7d e8             mov    %rdi,-0x18(%rbp)
    8d2f:       48 89 75 e0             mov    %rsi,-0x20(%rbp)
    8d33:       48 8b 45 e8             mov    -0x18(%rbp),%rax
    8d37:       0f b6 00                movzbl (%rax),%eax
    8d3a:       0f b6 c0                movzbl %al,%eax
    8d3d:       89 45 fc                mov    %eax,-0x4(%rbp)
    8d40:       8b 45 fc                mov    -0x4(%rbp),%eax
    8d43:       83 f8 17                cmp    $0x17,%eax
    8d46:       74 40                   je     8d88 <helper_function+0x61>
...

000000000000948a <buggy_invoker>:
    948a:       55                      push   %rbp
    948b:       48 89 e5                mov    %rsp,%rbp
    948e:       41 54                   push   %r12
    9490:       53                      push   %rbx
    9491:       48 81 ec c0 01 00 00    sub    $0x1c0,%rsp
    9498:       48 89 bd 38 fe ff ff    mov    %rdi,-0x1c8(%rbp)
    949f:       48 89 b5 30 fe ff ff    mov    %rsi,-0x1d0(%rbp)
    94a6:       48 c7 45 c0 00 00 00    movq   $0x0,-0x40(%rbp)
    94ad:       00 
    94ae:       48 c7 45 c8 00 00 00    movq   $0x0,-0x38(%rbp)
    94b5:       00 
    94b6:       c7 45 d0 00 00 00 00    movl   $0x0,-0x30(%rbp)
    94bd:       48 8d 55 c0             lea    -0x40(%rbp),%rdx
    94c1:       48 8b 85 38 fe ff ff    mov    -0x1c8(%rbp),%rax
    94c8:       48 89 d6                mov    %rdx,%rsi
    94cb:       48 89 c7                mov    %rax,%rdi
;; ***** NOT BUGGY HERE *****
;; Now, without optimization, both %rdi and %rsi loaded correctly.
    94ce:       e8 54 f8 ff ff          callq  8d27 <helper_function>

0000000000008eec <different_perfectly_fine_invoker>:
    8eec:       55                      push   %rbp
    8eed:       48 89 e5                mov    %rsp,%rbp
    8ef0:       48 83 ec 30             sub    $0x30,%rsp
    8ef4:       48 89 7d d8             mov    %rdi,-0x28(%rbp)
    8ef8:       48 c7 45 e0 00 00 00    movq   $0x0,-0x20(%rbp)
    8eff:       00 
    8f00:       48 c7 45 e8 00 00 00    movq   $0x0,-0x18(%rbp)
    8f07:       00 
    8f08:       c7 45 f0 00 00 00 00    movl   $0x0,-0x10(%rbp)
    8f0f:       48 8d 55 e0             lea    -0x20(%rbp),%rdx
    8f13:       48 8b 45 d8             mov    -0x28(%rbp),%rax
    8f17:       48 89 d6                mov    %rdx,%rsi
    8f1a:       48 89 c7                mov    %rax,%rdi
;; Another example of non-optimized call to that function.
    8f1d:       e8 05 fe ff ff          callq  8d27 <helper_function>

The Original C++ Code

This is a sanitized version of the original C++. I've just changed some names
and removed irrelevant code. Forgive my paranoia, I just don't want to expose
too much code from unpublished and unreleased work :-).

static void helper_function(my_struct_t *e, int *outArr)
{
  unsigned char event_type = e->header.type;
  if (event_type == event_A || event_type == event_B) {
    outArr[0] = action_one;
  } else if (event_type == event_C) {
    outArr[0] = action_one;
    outArr[1] = action_two;
  } else if (...) { ... }
}

static void buggy_invoker(my_struct_t *e, predicate_t pred)
{
  // MAX_ACTIONS is #defined to 5
  int action_array[MAX_ACTIONS] = {0};
  helper_function(e, action_array);
  ...
}

static int has_any_actions(my_struct_t *e)
{
  int actions[MAX_ACTIONS] = {0};
  helper_function(e, actions);
  return actions[0] != 0;
}

// *** ENTRY POINT to this code is this function (note not static).
void perfectly_fine_invoker(my_struct_t e, predicate_t pred)
{
  memfence();
  if (has_any_actions(&e)) {
    buggy_invoker(&e, pred);
  }
  ...
}

If you think I've obfuscated or eliminiated too much, let me know. Users of
this code call 'perfectly_fine_invoker'. With optimization, g++ optimizes the
'has_any_actions' function away into a direct call to 'helper_function', which
you can see in the assembly.

The Question

So, my question is, does it look like a buggy optimization to anyone else?

If it would be helpful, I could post a sanitized version of the original C++ code.

This is my first posting to Stack Overflow, so please let me know if I can do
anything to make the question clearer, or provide any additional information.

The Answer

Edit (several days after the fact):

I accepted an answer below to my question -- it was not an optimization bug in g++, I was just looking at the assembly code wrong.

However, for whoever may be viewing this question in the future, I've found the answer. I did some reading on undefined behavior in C ( http://blog.regehr.org/archives/213 and http://blog.llvm.org/2011/05/what-every-c-programmer-should-know.html ) and some of the descriptions of the compiler optimizing away functions with undefined behavior seemed eerily familiar.

I added some NULL-pointer checks to the function 'helper_function' and lo and behold... bug goes away. I should have had the NULL-pointer checks to begin with, but apparently not having them allowed g++ to do whatever it wanted (in my case, optimize away the call).

Hope this information helps someone down the road.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

马蹄踏│碎落叶 2024-12-08 04:52:13

我认为你看错了东西。我想编译器注意到你的函数很短并且没有触及 %rdi 寄存器,所以它只是不管它(你有与第一个参数相同的变量,我猜这就是放置的内容在 %rdi 中,请参阅第 21 页 http://www.x86-64.org/documentation/abi.pdf

如果您查看未优化的版本,它会在此保存 %rdi 注册行

9498:       48 89 bd 38 fe ff ff    mov    %rdi,-0x1c8(%rbp)

...然后在调用 helper_function 之前,它将保存的值移动到 %rax 中,然后再移动到 %rdi 中。

94c1:       48 8b 85 38 fe ff ff    mov    -0x1c8(%rbp),%rax
94c8:       48 89 d6                mov    %rdx,%rsi
94cb:       48 89 c7                mov    %rax,%rdi

当优化它时,编译器只是摆脱所有来回移动。

I think you are looking at the wrong thing. I imagine the compiler notice that your function is short and doesn't touch the %rdi register so it just leaves it alone (you have the same variable as the first parameter, which I guess is what is placed in %rdi. See page 21 here http://www.x86-64.org/documentation/abi.pdf)

If you look at the unoptimized version it saves the %rdi register on this line

9498:       48 89 bd 38 fe ff ff    mov    %rdi,-0x1c8(%rbp)

...and then later just before calling helper_function it moves the saved value into %rax that is moved into %rdi.

94c1:       48 8b 85 38 fe ff ff    mov    -0x1c8(%rbp),%rax
94c8:       48 89 d6                mov    %rdx,%rsi
94cb:       48 89 c7                mov    %rax,%rdi

When optimizing it the compiler just get rid of all that moving back and forth.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文