用于捕获 C++ 的 GNU LD 脚本组/动态部分

发布于 2024-08-04 01:37:56 字数 6012 浏览 1 评论 0原文

我正在维护一个可以将 ELF32 可重定位文件转换为 RDOFF2 格式的工具。
为了使此过程正常工作,我需要预先链接当前使用 ld 脚本的输入文件,如下所示:

OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
FORCE_COMMON_ALLOCATION

SECTIONS {
    .text : {
            /* collect .init / .fini sections */

        PROVIDE_HIDDEN(__init_start = .);
        KEEP (*(.init))
        PROVIDE_HIDDEN(__init_end = .);

        PROVIDE_HIDDEN(__fini_start = .);
        KEEP (*(.fini))
        PROVIDE_HIDDEN(__fini_end = .);

            /* .text and .rodata */

        *(.text .text.* .gnu.linkonce.t.*)
        *(.rodata .rodata.* .gnu.linkonce.r.*)
        *(.rodata1)


            /* .init- / .fini_arrays */

        PROVIDE_HIDDEN (__preinit_array_start = .);
        KEEP (*(.preinit_array))
        PROVIDE_HIDDEN (__preinit_array_end = .);

        PROVIDE_HIDDEN (__init_array_start = .);
        KEEP (*(SORT(.init_array.*)))
        KEEP (*(.init_array))
        PROVIDE_HIDDEN (__init_array_end = .);

        PROVIDE_HIDDEN (__fini_array_start = .);
        KEEP (*(SORT(.fini_array.*)))
        KEEP (*(.fini_array))
        PROVIDE_HIDDEN (__fini_array_end = .);
    }
    .data : {
        *(.data .data.* .gnu.linkonce.d.*)
        *(.data1)

        SORT(CONSTRUCTORS)

            /* c++ ctors / dtors and exception tables */

        PROVIDE_HIDDEN (__gcc_except_table_start = .);
        *(.gcc_except_table .gcc_except_table.*)
        PROVIDE_HIDDEN (__gcc_except_table_end = .);

        PROVIDE_HIDDEN (__eh_frame_start = .);
        *(.eh_frame_hdr)
        *(.eh_frame)
        PROVIDE_HIDDEN (__eh_frame_end = .);

        PROVIDE_HIDDEN (__ctors_array_start = .);
        KEEP (*(SORT(.ctors.*)))
        KEEP (*(.ctors))
        PROVIDE_HIDDEN (__ctors_array_end = .);

        PROVIDE_HIDDEN (__dtors_array_start = .);
        KEEP (*(SORT(.dtors.*)))
        KEEP (*(.dtors))
        PROVIDE_HIDDEN (__dtors_array_end = .);
    }
    .bss  : {
        *(.dynbss)
        *(.bss .bss.* .gnu.linkonce.b.*)
        *(COMMON)

        . = ALIGN(. != 0 ? 32 / 8 : 1);
    }
    /DISCARD/ : {
        *(.note.GNU-stack)
        *(.gnu_debuglink)
    }
}

目标是减少输入文件以仅包含 .text.data< /code>、.bss.strtab.symtab.shstrtab 部分。

虽然当前版本在 C 代码中工作得很好,但在 C++ 中却出现了问题,因为 g++ / ld 似乎生成了以我的一些代码命名的 SHT_DYNSYM 类型的部分。 C++ 符号。

我的问题:如何修改提供的链接描述文件以捕获这些杂散符号?

这是我的示例源:

/* compile with g++ -c cxx_hello.cc */
/* generic sys write provided by syswrite_$arch.S */
void _syscall_write(int fd, const char *msg, unsigned len);

void syscall_write(int fd, const char *msg, unsigned len)
{
    _syscall_write(fd, msg, len);
}

class HelloBase
{
    public:
        HelloBase()  { syscall_write(1, "::HelloBase()\n", 14); i = 42; };
        ~HelloBase() { syscall_write(1, "::~HelloBase()\n", 15); };
        int res(void) { return i; }
    protected:
        void sayHi(void) { syscall_write(1, "Hello", 5); };
    private:
        int i;
};

class HelloDeriv : public HelloBase
{
    public:
        HelloDeriv()  { syscall_write(1, "::HelloDeriv()\n", 15); }
        ~HelloDeriv() { syscall_write(1, "::~HelloDeriv()\n", 16); }

        void greet(void) { this->sayHi(); syscall_write(1, ", World!\n", 9); }
}; 

int
_main(void)
{
    HelloDeriv hello;

    hello.greet();
    return hello.res();
}

objdump -h 的输出(仅有趣的部分):

cxx_hello.o:     file format elf32-i386

Sections:
Idx Name          Size      VMA       LMA       File off  Algn
0 _ZN9HelloBase3resEv 00000008  00000000  00000000  00000034  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

1 _ZN9HelloBaseC2Ev 00000008  00000000  00000000  0000003c  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

2 _ZN9HelloBaseD2Ev 00000008  00000000  00000000  00000044  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

3 _ZN10HelloDerivC1Ev 00000008  00000000  00000000  0000004c  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

4 _ZN9HelloBase5sayHiEv 00000008  00000000  00000000  00000054  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

5 _ZN10HelloDeriv5greetEv 00000008  00000000  00000000  0000005c  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

6 _ZN10HelloDerivD1Ev 00000008  00000000  00000000  00000064  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

7 .text         00000000  00000000  00000000  0000006c  2**2
              CONTENTS, ALLOC, LOAD, READONLY, CODE

8 .data         00000000  00000000  00000000  0000006c  2**2

              CONTENTS, ALLOC, LOAD, DATA
9 .bss          00000000  00000000  00000000  0000006c  2**2
              ALLOC

readelf -S 相同的文件

There are 37 section headers, starting at offset 0x59c:

Section Headers:
[Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
[ 0]                   NULL            00000000 000000 000000 00      0   0  0
[ 1] .group            GROUP           00000000 000034 000008 04     35  26  4
[ 2] .group            GROUP           00000000 00003c 000008 04     35  30  4
[ 3] .group            GROUP           00000000 000044 000008 04     35  31  4
[ 4] .group            GROUP           00000000 00004c 000008 04     35  33  4
[ 5] .group            GROUP           00000000 000054 000008 04     35  34  4
[ 6] .group            GROUP           00000000 00005c 000008 04     35  35  4
[ 7] .group            GROUP           00000000 000064 000008 04     35  36  4
[ 8] .text             PROGBITS        00000000 00006c 000000 00  AX  0   0  4
[ 9] .data             PROGBITS        00000000 00006c 000000 00  WA  0   0  4
[10] .bss              NOBITS          00000000 00006c 000000 00  WA  0   0  4

I'm maintaining a tool which can convert ELF32 relocatables to RDOFF2 format.
For this process to work I need to pre-link the input files currently using the ld-script shown below:

OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
FORCE_COMMON_ALLOCATION

SECTIONS {
    .text : {
            /* collect .init / .fini sections */

        PROVIDE_HIDDEN(__init_start = .);
        KEEP (*(.init))
        PROVIDE_HIDDEN(__init_end = .);

        PROVIDE_HIDDEN(__fini_start = .);
        KEEP (*(.fini))
        PROVIDE_HIDDEN(__fini_end = .);

            /* .text and .rodata */

        *(.text .text.* .gnu.linkonce.t.*)
        *(.rodata .rodata.* .gnu.linkonce.r.*)
        *(.rodata1)


            /* .init- / .fini_arrays */

        PROVIDE_HIDDEN (__preinit_array_start = .);
        KEEP (*(.preinit_array))
        PROVIDE_HIDDEN (__preinit_array_end = .);

        PROVIDE_HIDDEN (__init_array_start = .);
        KEEP (*(SORT(.init_array.*)))
        KEEP (*(.init_array))
        PROVIDE_HIDDEN (__init_array_end = .);

        PROVIDE_HIDDEN (__fini_array_start = .);
        KEEP (*(SORT(.fini_array.*)))
        KEEP (*(.fini_array))
        PROVIDE_HIDDEN (__fini_array_end = .);
    }
    .data : {
        *(.data .data.* .gnu.linkonce.d.*)
        *(.data1)

        SORT(CONSTRUCTORS)

            /* c++ ctors / dtors and exception tables */

        PROVIDE_HIDDEN (__gcc_except_table_start = .);
        *(.gcc_except_table .gcc_except_table.*)
        PROVIDE_HIDDEN (__gcc_except_table_end = .);

        PROVIDE_HIDDEN (__eh_frame_start = .);
        *(.eh_frame_hdr)
        *(.eh_frame)
        PROVIDE_HIDDEN (__eh_frame_end = .);

        PROVIDE_HIDDEN (__ctors_array_start = .);
        KEEP (*(SORT(.ctors.*)))
        KEEP (*(.ctors))
        PROVIDE_HIDDEN (__ctors_array_end = .);

        PROVIDE_HIDDEN (__dtors_array_start = .);
        KEEP (*(SORT(.dtors.*)))
        KEEP (*(.dtors))
        PROVIDE_HIDDEN (__dtors_array_end = .);
    }
    .bss  : {
        *(.dynbss)
        *(.bss .bss.* .gnu.linkonce.b.*)
        *(COMMON)

        . = ALIGN(. != 0 ? 32 / 8 : 1);
    }
    /DISCARD/ : {
        *(.note.GNU-stack)
        *(.gnu_debuglink)
    }
}

The goal is to reduce the input file to contain only .text, .data, .bss, .strtab, .symtab and .shstrtab sections.

While the curren version works fine with C code it breaks for C++ as g++ / ld seem to generate sections of type SHT_DYNSYM named after some of my C++ symbols.

My question: How would one modify the provided linker script to catch those stray symbols?

Here is my example source:

/* compile with g++ -c cxx_hello.cc */
/* generic sys write provided by syswrite_$arch.S */
void _syscall_write(int fd, const char *msg, unsigned len);

void syscall_write(int fd, const char *msg, unsigned len)
{
    _syscall_write(fd, msg, len);
}

class HelloBase
{
    public:
        HelloBase()  { syscall_write(1, "::HelloBase()\n", 14); i = 42; };
        ~HelloBase() { syscall_write(1, "::~HelloBase()\n", 15); };
        int res(void) { return i; }
    protected:
        void sayHi(void) { syscall_write(1, "Hello", 5); };
    private:
        int i;
};

class HelloDeriv : public HelloBase
{
    public:
        HelloDeriv()  { syscall_write(1, "::HelloDeriv()\n", 15); }
        ~HelloDeriv() { syscall_write(1, "::~HelloDeriv()\n", 16); }

        void greet(void) { this->sayHi(); syscall_write(1, ", World!\n", 9); }
}; 

int
_main(void)
{
    HelloDeriv hello;

    hello.greet();
    return hello.res();
}

The output of objdump -h (only interesting sections):

cxx_hello.o:     file format elf32-i386

Sections:
Idx Name          Size      VMA       LMA       File off  Algn
0 _ZN9HelloBase3resEv 00000008  00000000  00000000  00000034  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

1 _ZN9HelloBaseC2Ev 00000008  00000000  00000000  0000003c  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

2 _ZN9HelloBaseD2Ev 00000008  00000000  00000000  00000044  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

3 _ZN10HelloDerivC1Ev 00000008  00000000  00000000  0000004c  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

4 _ZN9HelloBase5sayHiEv 00000008  00000000  00000000  00000054  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

5 _ZN10HelloDeriv5greetEv 00000008  00000000  00000000  0000005c  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

6 _ZN10HelloDerivD1Ev 00000008  00000000  00000000  00000064  2**2
              CONTENTS, READONLY, EXCLUDE, GROUP, LINK_ONCE_DISCARD

7 .text         00000000  00000000  00000000  0000006c  2**2
              CONTENTS, ALLOC, LOAD, READONLY, CODE

8 .data         00000000  00000000  00000000  0000006c  2**2

              CONTENTS, ALLOC, LOAD, DATA
9 .bss          00000000  00000000  00000000  0000006c  2**2
              ALLOC

The same file with readelf -S

There are 37 section headers, starting at offset 0x59c:

Section Headers:
[Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
[ 0]                   NULL            00000000 000000 000000 00      0   0  0
[ 1] .group            GROUP           00000000 000034 000008 04     35  26  4
[ 2] .group            GROUP           00000000 00003c 000008 04     35  30  4
[ 3] .group            GROUP           00000000 000044 000008 04     35  31  4
[ 4] .group            GROUP           00000000 00004c 000008 04     35  33  4
[ 5] .group            GROUP           00000000 000054 000008 04     35  34  4
[ 6] .group            GROUP           00000000 00005c 000008 04     35  35  4
[ 7] .group            GROUP           00000000 000064 000008 04     35  36  4
[ 8] .text             PROGBITS        00000000 00006c 000000 00  AX  0   0  4
[ 9] .data             PROGBITS        00000000 00006c 000000 00  WA  0   0  4
[10] .bss              NOBITS          00000000 00006c 000000 00  WA  0   0  4

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

寻找一个思念的角度 2024-08-11 01:37:56

好吧,我有一个非常简单的建议,如此简单可能行不通...

相应于:

http://ftp.gnu.org/old-gnu/Manuals/ld-2.9.1/html_node/ld_19.html

您可以仅指定文件名(.o) 用于捕获其所有部分。

并且您可以使用通配符。

难道只有一个 * 的行就可以捕获所有剩余的部分吗?像这样:

.bss  : {
    *(.dynbss)
    *(.bss .bss.* .gnu.linkonce.b.*)
    *(COMMON)
    *
}

Well, I have a quite simple sugestion, so simple it may not work...

Accordingly to:

http://ftp.gnu.org/old-gnu/Manuals/ld-2.9.1/html_node/ld_19.html

You can specify just the name of a file (.o) for all its sections to be captured.

And you can use wildcards.

Could it be that a line with just a * would capture all remaining sections? Like this:

.bss  : {
    *(.dynbss)
    *(.bss .bss.* .gnu.linkonce.b.*)
    *(COMMON)
    *
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文