ebp + JIT 编译器中的 6 代替 +8

发布于 2024-09-05 17:58:50 字数 3046 浏览 4 评论 0原文

我正在虚拟机中实现一个简单的 JIT 编译器，我编写它是为了好玩（主要是为了了解更多有关语言设计的知识），并且我遇到了一些奇怪的行为，也许有人可以告诉我原因。

首先，我为 C 和 C++ 定义了一个 JIT“原型”：

#ifdef __cplusplus 
    typedef void* (*_JIT_METHOD) (...);
#else
    typedef (*_JIT_METHOD) ();
#endif

我有一个compile() 函数，它将把东西编译成 ASM 并将其粘贴在内存中的某个位置：

void* compile (void* something)
{
    // grab some memory
    unsigned char* buffer = (unsigned char*) malloc (1024);

    // xor eax, eax
    // inc eax
    // inc eax
    // inc eax
    // ret -> eax should be 3
    /* WORKS!
    buffer[0] = 0x67;
    buffer[1] = 0x31;
    buffer[2] = 0xC0;
    buffer[3] = 0x67;
    buffer[4] = 0x40;
    buffer[5] = 0x67;
    buffer[6] = 0x40;
    buffer[7] = 0x67;
    buffer[8] = 0x40;
    buffer[9] = 0xC3; */

    // xor eax, eax
    // mov eax, 9
    // ret 4 -> eax should be 9
    /* WORKS!
    buffer[0] = 0x67;
    buffer[1] = 0x31;
    buffer[2] = 0xC0;
    buffer[3] = 0x67;
    buffer[4] = 0xB8;
    buffer[5] = 0x09;
    buffer[6] = 0x00;
    buffer[7] = 0x00;
    buffer[8] = 0x00;
    buffer[9] = 0xC3; */


    // push ebp
    // mov ebp, esp
    // mov eax, [ebp + 6] ; wtf? shouldn't this be [ebp + 8]!?
    // mov esp, ebp
    // pop ebp
    // ret -> eax should be the first value sent to the function
    /* WORKS! */
    buffer[0] = 0x66;
    buffer[1] = 0x55;
    buffer[2] = 0x66;
    buffer[3] = 0x89;
    buffer[4] = 0xE5;
    buffer[5] = 0x66;
    buffer[6] = 0x66;
    buffer[7] = 0x8B;
    buffer[8] = 0x45;
    buffer[9] = 0x06;
    buffer[10] = 0x66;
    buffer[11] = 0x89;
    buffer[12] = 0xEC;
    buffer[13] = 0x66;
    buffer[14] = 0x5D;
    buffer[15] = 0xC3;

    // mov eax, 5
    // add eax, ecx
    // ret -> eax should be 50
    /* WORKS!
    buffer[0] = 0x67;
    buffer[1] = 0xB8;
    buffer[2] = 0x05;
    buffer[3] = 0x00;
    buffer[4] = 0x00;
    buffer[5] = 0x00;
    buffer[6] = 0x66;
    buffer[7] = 0x01;
    buffer[8] = 0xC8;
    buffer[9] = 0xC3; */

    return buffer;
}

最后，我有了主要的块程序：

int main (int argc, char **args)
{
    DWORD oldProtect = (DWORD) NULL;
    int i = 667, j = 1, k = 5, l = 0;

    // generate some arbitrary function
    _JIT_METHOD someFunc = (_JIT_METHOD) compile(NULL);

    // windows only
#if defined _WIN64 || defined _WIN32
    // set memory permissions and flush CPU code cache
    VirtualProtect(someFunc,1024,PAGE_EXECUTE_READWRITE, &oldProtect);  
    FlushInstructionCache(GetCurrentProcess(), someFunc, 1024);
#endif

    // this asm just for some debugging/testing purposes
    __asm mov ecx, i

    // run compiled function (from wherever *someFunc is pointing to)
    l = (int)someFunc(i, k);

    // did it work?
    printf("result: %d", l);

    free (someFunc);
    _getch();

    return 0;
}

如您所见，我对compile()函数进行了一些测试，以确保获得预期的结果，几乎一切正常，但我有一个问题......

在大多数教程中或文档资源，要获取传递的函数的第一个值（在整数的情况下），您需要执行 [ebp+8]、第二个 [ebp+12] 和等等。由于某种原因，我必须执行 [ebp+6] 然后 [ebp+10] 等等。谁能告诉我为什么？

原文

I'm implementing a simplistic JIT compiler in a VM I'm writing for fun (mostly to learn more about language design) and I'm getting some weird behavior, maybe someone can tell me why.

First I define a JIT "prototype" both for C and C++:

#ifdef __cplusplus 
    typedef void* (*_JIT_METHOD) (...);
#else
    typedef (*_JIT_METHOD) ();
#endif

I have a compile() function that will compile stuff into ASM and stick it somewhere in memory:

void* compile (void* something)
{
    // grab some memory
    unsigned char* buffer = (unsigned char*) malloc (1024);

    // xor eax, eax
    // inc eax
    // inc eax
    // inc eax
    // ret -> eax should be 3
    /* WORKS!
    buffer[0] = 0x67;
    buffer[1] = 0x31;
    buffer[2] = 0xC0;
    buffer[3] = 0x67;
    buffer[4] = 0x40;
    buffer[5] = 0x67;
    buffer[6] = 0x40;
    buffer[7] = 0x67;
    buffer[8] = 0x40;
    buffer[9] = 0xC3; */

    // xor eax, eax
    // mov eax, 9
    // ret 4 -> eax should be 9
    /* WORKS!
    buffer[0] = 0x67;
    buffer[1] = 0x31;
    buffer[2] = 0xC0;
    buffer[3] = 0x67;
    buffer[4] = 0xB8;
    buffer[5] = 0x09;
    buffer[6] = 0x00;
    buffer[7] = 0x00;
    buffer[8] = 0x00;
    buffer[9] = 0xC3; */


    // push ebp
    // mov ebp, esp
    // mov eax, [ebp + 6] ; wtf? shouldn't this be [ebp + 8]!?
    // mov esp, ebp
    // pop ebp
    // ret -> eax should be the first value sent to the function
    /* WORKS! */
    buffer[0] = 0x66;
    buffer[1] = 0x55;
    buffer[2] = 0x66;
    buffer[3] = 0x89;
    buffer[4] = 0xE5;
    buffer[5] = 0x66;
    buffer[6] = 0x66;
    buffer[7] = 0x8B;
    buffer[8] = 0x45;
    buffer[9] = 0x06;
    buffer[10] = 0x66;
    buffer[11] = 0x89;
    buffer[12] = 0xEC;
    buffer[13] = 0x66;
    buffer[14] = 0x5D;
    buffer[15] = 0xC3;

    // mov eax, 5
    // add eax, ecx
    // ret -> eax should be 50
    /* WORKS!
    buffer[0] = 0x67;
    buffer[1] = 0xB8;
    buffer[2] = 0x05;
    buffer[3] = 0x00;
    buffer[4] = 0x00;
    buffer[5] = 0x00;
    buffer[6] = 0x66;
    buffer[7] = 0x01;
    buffer[8] = 0xC8;
    buffer[9] = 0xC3; */

    return buffer;
}

And finally I have the main chunk of the program:

int main (int argc, char **args)
{
    DWORD oldProtect = (DWORD) NULL;
    int i = 667, j = 1, k = 5, l = 0;

    // generate some arbitrary function
    _JIT_METHOD someFunc = (_JIT_METHOD) compile(NULL);

    // windows only
#if defined _WIN64 || defined _WIN32
    // set memory permissions and flush CPU code cache
    VirtualProtect(someFunc,1024,PAGE_EXECUTE_READWRITE, &oldProtect);  
    FlushInstructionCache(GetCurrentProcess(), someFunc, 1024);
#endif

    // this asm just for some debugging/testing purposes
    __asm mov ecx, i

    // run compiled function (from wherever *someFunc is pointing to)
    l = (int)someFunc(i, k);

    // did it work?
    printf("result: %d", l);

    free (someFunc);
    _getch();

    return 0;
}

As you can see, the compile() function has a couple of tests I ran to make sure I get expected results, and pretty much everything works but I have a question...

On most tutorials or documentation resources, to get the first value of a function passed (in the case of ints) you do [ebp+8], the second [ebp+12] and so forth. For some reason, I have to do [ebp+6] then [ebp+10] and so forth. Could anyone tell me why?

分享到QQ

分享到微博

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

需要登录才能够评论，你可以免费注册一个本站的账号。

太阳公公是暖光 2024-09-12 17:58:50

您的操作码看起来很可疑：它们充满了 0x66 和 0x67 地址/数据大小覆盖前缀，这些前缀（在 32 位代码段中）将转换为 32 位操作转换成16位的。例如

buffer[0] = 0x66;
buffer[1] = 0x55;
buffer[2] = 0x66;
buffer[3] = 0x89;
buffer[4] = 0xE5;
...

push bp
mov  bp, sp

而不是

push ebp
mov  ebp, esp

（这似乎解释了观察到的行为：按下 bp 会使堆栈指针减少 2 而不是 4）。

Your opcodes look suspicious: they're full of 0x66 and 0x67 address/data size override prefixes, which (in a 32-bit code segment) will turn 32-bit operations into 16-bit ones. e.g.

buffer[0] = 0x66;
buffer[1] = 0x55;
buffer[2] = 0x66;
buffer[3] = 0x89;
buffer[4] = 0xE5;
...

push bp
mov  bp, sp

rather than

push ebp
mov  ebp, esp

(which seems to explain the observed behaviour: pushing bp decrements the stack pointer by 2 instead of 4).

回复收藏 0 原文

探春 2024-09-12 17:58:50

您的问题是 66 和 67 字节——分别是操作数大小覆盖和地址大小覆盖。

由于您在 32 位模式下运行此代码，因此这些字节告诉处理器您需要 16 位操作数和地址，而不是 32 位操作数和地址。 66 55 反汇编为 PUSH BP，它只推送 2 个字节而不是 4 个字节，因此您的地址偏移了 2。

67 字节前两个示例也是不必要的，但因为您只访问寄存器而不是内存，所以它们没有任何效果并且不会破坏任何东西（还）。这些字节也应该被删除。

看起来您正在使用专为 16 位代码设计的框架，或者您可以通过某种方式告诉它您需要 32 位代码。

回复收藏 0 原文

~没有更多了~

关于作者

涙—继续流

暂无简介

0 文章

0 评论

23 人气

关注发私信

友情链接

文江博客

ebp + JIT 编译器中的 6 代替 +8

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（2）

关于作者

相关话题

热门标签

推荐作者

留蓝

18790681156

zach7772

Wini

ayeshaaroy

初雪

友情链接

ebp + JIT 编译器中的 6 代替 +8

如果你对这篇内容有疑问，欢迎到本站社区发帖提问 参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。

发布评论

评论（2）

关于作者

相关话题

热门标签

推荐作者

留蓝

18790681156

zach7772

Wini

ayeshaaroy

初雪

友情链接

如果你对这篇内容有疑问，欢迎到本站社区发帖提问参与讨论，获取更多帮助，或者扫码二维码加入 Web 技术交流群。