在汇编代码中寻找不必要的缓冲区副本
我正在使用 Visual Studio 2008 C++ for Windows Mobile 6 ARMV4I,并且我正在尝试学习读取 VS 生成的 ARM 汇编代码,以最大程度地减少应用程序中不必要的缓冲区副本。因此,我创建了一个如下所示的测试应用程序:
#include <vector>
typedef std::vector< BYTE > Buf;
class Foo
{
public:
Foo( Buf b ) { b_.swap( b ); };
private:
Buf b_;
};
Buf Create()
{
Buf b( 1024 );
b[ 0 ] = 0x0001;
return b;
}
int _tmain( int argc, _TCHAR* argv[] )
{
Foo f( Create() );
return 0;
}
我想了解 Create
返回的缓冲区在传递给 Foo
构造函数时是否被复制,或者如果编译器能够优化该副本。在打开优化的发布版本中,这会生成如下所示的程序集:
class Foo
{
public:
Foo( Buf b ) { b_.swap( b ); };
0001112C stmdb sp!, {r4 - r7, lr}
00011130 mov r7, r0
00011134 mov r3, #0
00011138 str r3, this
0001113C str r3, [r7, #4]
00011140 str r3, [r7, #8]
00011144 ldr r3, this
00011148 ldr r2, this
0001114C mov r5, r7
00011150 mov r4, r1
00011154 str r3, this, #4
00011158 str r2, this, #4
0001115C mov r6, r1
00011160 ldr r2, this
00011164 ldr r3, this
00011168 mov lr, r7
0001116C str r3, this
00011170 str r2, this
00011174 ldr r2, [lr, #8]!
00011178 ldr r3, [r6, #8]!
0001117C str r3, this
00011180 str r2, this
00011184 ldr r3, this
00011188 movs r0, r3
0001118C beq |Foo::Foo + 0x84 ( 111b0h )|
00011190 ldr r3, [r1, #8]
00011194 sub r1, r3, r0
00011198 cmp r1, #0x80
0001119C bls |Foo::Foo + 0x80 ( 111ach )|
000111A0 bl 000112D4
000111A4 mov r0, r7
000111A8 ldmia sp!, {r4 - r7, pc}
000111AC bl |stlp_std::__node_alloc::_M_deallocate ( 11d2ch )|
000111B0 mov r0, r7
000111B4 ldmia sp!, {r4 - r7, pc}
--- ...\stlport\stl\_vector.h -----------------------------
// snip!
--- ...\asm_test.cpp
private:
Buf b_;
};
Buf Create()
{
00011240 stmdb sp!, {r4, lr}
00011244 mov r4, r0
Buf b( 1024 );
00011248 mov r1, #1, 22
0001124C bl |
b[ 0 ] = 0x0001;
00011250 ldr r3, [r4]
00011254 mov r2, #1
return b;
}
int _tmain( int argc, _TCHAR* argv[] )
{
00011264 str lr, [sp, #-4]!
00011268 sub sp, sp, #0x18
Foo f( Create() );
0001126C add r0, sp, #0xC
00011270 bl |Create ( 11240h )|
00011274 mov r1, r0
00011278 add r0, sp, #0
0001127C bl |Foo::Foo ( 1112ch )|
return 0;
00011280 ldr r0, argc
00011284 cmp r0, #0
00011288 beq |wmain + 0x44 ( 112a8h )|
0001128C ldr r3, [sp, #8]
00011290 sub r1, r3, r0
00011294 cmp r1, #0x80
00011298 bls |wmain + 0x40 ( 112a4h )|
0001129C bl 000112D4
000112A0 b |wmain + 0x44 ( 112a8h )|
000112A4 bl |stlp_std::__node_alloc::_M_deallocate ( 11d2ch )|
000112A8 mov r0, #0
}
我可以在汇编代码中查找哪些模式来了解 Buf
结构被复制到哪里?
I am using Visual Studio 2008 C++ for Windows Mobile 6 ARMV4I and I'm trying to learn to read the ARM assembly code generated by VS to minimize unneessary buffer copies within an application. So, I've created a test application that looks like this:
#include <vector>
typedef std::vector< BYTE > Buf;
class Foo
{
public:
Foo( Buf b ) { b_.swap( b ); };
private:
Buf b_;
};
Buf Create()
{
Buf b( 1024 );
b[ 0 ] = 0x0001;
return b;
}
int _tmain( int argc, _TCHAR* argv[] )
{
Foo f( Create() );
return 0;
}
I'd like to understand if the buffer returned by Create
is copied when given to the Foo
constructor or if the compiler is able to optimize that copy away. In the Release build with optimizations turned on, this generates assembly like this:
class Foo
{
public:
Foo( Buf b ) { b_.swap( b ); };
0001112C stmdb sp!, {r4 - r7, lr}
00011130 mov r7, r0
00011134 mov r3, #0
00011138 str r3, this
0001113C str r3, [r7, #4]
00011140 str r3, [r7, #8]
00011144 ldr r3, this
00011148 ldr r2, this
0001114C mov r5, r7
00011150 mov r4, r1
00011154 str r3, this, #4
00011158 str r2, this, #4
0001115C mov r6, r1
00011160 ldr r2, this
00011164 ldr r3, this
00011168 mov lr, r7
0001116C str r3, this
00011170 str r2, this
00011174 ldr r2, [lr, #8]!
00011178 ldr r3, [r6, #8]!
0001117C str r3, this
00011180 str r2, this
00011184 ldr r3, this
00011188 movs r0, r3
0001118C beq |Foo::Foo + 0x84 ( 111b0h )|
00011190 ldr r3, [r1, #8]
00011194 sub r1, r3, r0
00011198 cmp r1, #0x80
0001119C bls |Foo::Foo + 0x80 ( 111ach )|
000111A0 bl 000112D4
000111A4 mov r0, r7
000111A8 ldmia sp!, {r4 - r7, pc}
000111AC bl |stlp_std::__node_alloc::_M_deallocate ( 11d2ch )|
000111B0 mov r0, r7
000111B4 ldmia sp!, {r4 - r7, pc}
--- ...\stlport\stl\_vector.h -----------------------------
// snip!
--- ...\asm_test.cpp
private:
Buf b_;
};
Buf Create()
{
00011240 stmdb sp!, {r4, lr}
00011244 mov r4, r0
Buf b( 1024 );
00011248 mov r1, #1, 22
0001124C bl |
b[ 0 ] = 0x0001;
00011250 ldr r3, [r4]
00011254 mov r2, #1
return b;
}
int _tmain( int argc, _TCHAR* argv[] )
{
00011264 str lr, [sp, #-4]!
00011268 sub sp, sp, #0x18
Foo f( Create() );
0001126C add r0, sp, #0xC
00011270 bl |Create ( 11240h )|
00011274 mov r1, r0
00011278 add r0, sp, #0
0001127C bl |Foo::Foo ( 1112ch )|
return 0;
00011280 ldr r0, argc
00011284 cmp r0, #0
00011288 beq |wmain + 0x44 ( 112a8h )|
0001128C ldr r3, [sp, #8]
00011290 sub r1, r3, r0
00011294 cmp r1, #0x80
00011298 bls |wmain + 0x40 ( 112a4h )|
0001129C bl 000112D4
000112A0 b |wmain + 0x44 ( 112a8h )|
000112A4 bl |stlp_std::__node_alloc::_M_deallocate ( 11d2ch )|
000112A8 mov r0, #0
}
What patterns can I look for in the assembly code to understand where the Buf
structure is being copied?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(2)
分析
Create
相当简单,因为代码非常短。显然,这里应用了 NRVO,因为 return 语句没有生成任何指令,返回值是在 r0 中就地构造的。Foo::Foo
的按值传递参数发生的复制稍微难以分析,但在调用Create
和Foo::Foo
必须进行复制,并且不会对std::vector
进行深层复制。所以看起来那个副本也已经被淘汰了。另一种可能性是 Foo::Foo 的自定义调用约定,其中参数实际上通过引用传递并在函数内复制。您需要能够进行更深入的 ARM 汇编分析的人,而我将排除这种可能性。Analyzing
Create
is fairly straightforward, because the code is so short. NRVO clearly has been applied here because the return statement generated no instructions, the return value is constructed in-place inr0
.The copy that would take place for
Foo::Foo
's pass-by-value parameter is slightly harder to analyze, but there's very little code between the calls toCreate
andFoo::Foo
where the copy would have to take place, and nothing that would do a deep copy of astd::vector
. So it looks like that copy has been eliminated as well. The other possibility is a custom calling convention forFoo::Foo
where the argument is actually passed by reference and copied inside the function. You'd need someone capable of deeper ARM assembly analysis that I am to rule that out.缓冲区将被复制;您正在使用 C++ 的按值传递语义;没有编译器会为你优化它。它的复制方式取决于 std::vector 的复制构造函数。
The buffer will be copied; you are using pass by value semantics of c++; no compiler will optimize that for you. How its copied will depend on the copy constructor of std::vector.