如何从libopcodes中获取指令信息?
我正在编写一个工具,它在 x86-32 和 x86-64 Linux 中使用 libbfd 和 libopcodes 来执行反汇编。问题是,虽然我能够反汇编 libopcodes,但我无法获得任何指令信息。为了演示的目的,我做了一个最小的例子来重现我的问题。程序应该从入口点反汇编到第一个 RET
/RETQ
。
该代码有点被全局变量破坏,并且为了简洁而省略了错误检查等,但应该清楚地说明问题。
#include <bfd.h>
#include <dis-asm.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <libiberty.h>
/*
* Holds state for BFD and libopcodes.
*/
bfd * abfd = NULL;
disassemble_info dinfo = {0};
/*
* Temporary hack to signal when disassembling should stop.
*/
static bool stop_disassembling = FALSE;
/*
* Gets path to currently running executable.
*/
bool get_target_path(char * target_path, size_t size)
{
char * path;
ssize_t len;
pid_t pid = getpid();
sprintf(target_path, "/proc/%d/exe", (int)pid );
path = strdup(target_path);
len = readlink(path, target_path, size);
target_path[len] = '\0';
free(path);
return TRUE;
}
/*
* libopcodes appends spaces on the end of some instructions so for
* comparisons, we want to strip those first.
*/
void strip_tail(char * str, unsigned int size)
{
int i;
for(i = 0; i < size; i++) {
if(!isgraph(str[i])) {
str[i] = '\0';
break;
}
}
}
/*
* Checks whether the current instruction will cause the control flow to not
* proceed to the linearly subsequent instruction (e.g. ret, jmp, etc.)
*/
bool breaks_control_flow(char * str)
{
if(abfd->arch_info->bits_per_address == 64) {
if(strcmp(str, "retq") == 0) {
return TRUE;
}
} else {
if(strcmp(str, "ret") == 0) {
return TRUE;
}
}
return FALSE;
}
/*
* Used as a callback for libopcodes so we can do something useful with the
* disassembly. Currently this just outputs to stdout.
*/
int custom_fprintf(void * stream, const char * format, ...)
{
/* silly amount */
char str[128] = {0};
int rv;
va_list args;
va_start(args, format);
rv = vsnprintf(str, ARRAY_SIZE(str) - 1, format, args);
va_end(args);
puts(str);
strip_tail(str, ARRAY_SIZE(str));
if(breaks_control_flow(str)) {
puts("Stopped disassembly");
stop_disassembling = TRUE;
}
if(dinfo.insn_info_valid) {
switch(dinfo.insn_type) {
case dis_noninsn:
printf("not an instruction\n");
break;
case dis_nonbranch:
printf("not a branch\n");
break;
case dis_branch:
printf("is a branch\n");
break;
case dis_condbranch:
printf("is a conditional branch\n");
break;
case dis_jsr:
printf("jump to subroutine\n");
break;
case dis_condjsr:
printf("conditional jump to subroutine\n");
break;
case dis_dref:
printf("data reference in instruction\n");
break;
case dis_dref2:
printf("two data references in instruction\n");
break;
default:
printf("not enumerated\n");
break;
}
} else {
printf("insn_info not valid\n");
}
return rv;
}
/*
* Initialises libopcodes disassembler and returns an instance of it.
*/
disassembler_ftype init_disasm(bfd * abfd, disassemble_info * dinfo)
{
/* Override the stream the disassembler outputs to */
init_disassemble_info(dinfo, NULL, custom_fprintf);
dinfo->flavour = bfd_get_flavour(abfd);
dinfo->arch = bfd_get_arch(abfd);
dinfo->mach = bfd_get_mach(abfd);
dinfo->endian = abfd->xvec->byteorder;
disassemble_init_for_target(dinfo);
return disassembler(abfd);
}
/*
* Method of locating section from VMA taken from opdis.
*/
typedef struct {
bfd_vma vma;
asection * sec;
} BFD_VMA_SECTION;
/*
* Loads section and fills in dinfo accordingly. Since this function allocates
* memory in dinfo->buffer, callers need to call free once they are finished.
*/
bool load_section(bfd * abfd, disassemble_info * dinfo, asection * s)
{
int size = bfd_section_size(s->owner, s);
unsigned char * buf = xmalloc(size);
if(!bfd_get_section_contents(s->owner, s, buf, 0, size)) {
free(buf);
return FALSE;
}
dinfo->section = s;
dinfo->buffer = buf;
dinfo->buffer_length = size;
dinfo->buffer_vma = bfd_section_vma(s->owner, s);
printf("Allocated %d bytes for %s section\n: 0x%lX", size, s->name,
dinfo->buffer_vma);
return TRUE;
}
/*
* Used to locate section for a vma.
*/
void vma_in_section(bfd * abfd, asection * s, void * data)
{
BFD_VMA_SECTION * req = data;
if(req && req->vma >= s->vma &&
req->vma < (s->vma + bfd_section_size(abfd, s)) ) {
req->sec = s;
}
}
/*
* Locate and load section containing vma.
*/
bool load_section_for_vma(bfd * abfd, disassemble_info * dinfo,
bfd_vma vma)
{
BFD_VMA_SECTION req = {vma, NULL};
bfd_map_over_sections(abfd, vma_in_section, &req);
if(!req.sec) {
return FALSE;
} else {
return load_section(abfd, dinfo, req.sec);
}
}
/*
* Start disassembling from entry point.
*/
bool disassemble_entry(bfd * abfd, disassemble_info * dinfo,
disassembler_ftype disassembler)
{
bfd_vma vma = bfd_get_start_address(abfd);
/* First locate and load the section containing the vma */
if(load_section_for_vma(abfd, dinfo, vma)) {
int size;
/* Keep disassembling until signalled otherwise or error */
while(true) {
dinfo->insn_info_valid = 0;
size = disassembler(vma, dinfo);
printf("Disassembled %d bytes at 0x%lX\n", size, vma);
if(size == 0 || size == -1 || stop_disassembling) {
break;
}
vma += size;
}
free(dinfo->buffer);
return TRUE;
}
return FALSE;
}
int main(void)
{
char target_path[PATH_MAX] = {0};
bfd_init();
/* Get path for the running instance of this program */
get_target_path(target_path, ARRAY_SIZE(target_path));
abfd = bfd_openr(target_path, NULL);
if(abfd != NULL && bfd_check_format(abfd, bfd_object)) {
disassembler_ftype disassembler = init_disasm(abfd, &dinfo);
disassemble_entry(abfd, &dinfo, disassembler);
bfd_close(abfd);
}
return EXIT_SUCCESS;
}
可以使用以下 makefile
构建该源代码。要执行成功的链接,需要在本地计算机上安装 binutils-dev 软件包:
all:
gcc -Wall disasm.c -o disasm -lbfd -lopcodes
clean:
rm -f disasm
运行时,输出如下:
Allocated 2216 bytes for .text section
: 0x400BF0xor
insn_info not valid
%ebp
insn_info not valid
,
insn_info not valid
%ebp
insn_info not valid
Disassembled 2 bytes at 0x400BF0
mov
insn_info not valid
%rdx
insn_info not valid
,
insn_info not valid
%r9
insn_info not valid
Disassembled 3 bytes at 0x400BF2
pop
insn_info not valid
%rsi
insn_info not valid
Disassembled 1 bytes at 0x400BF5
mov
insn_info not valid
%rsp
insn_info not valid
,
insn_info not valid
%rdx
insn_info not valid
Disassembled 3 bytes at 0x400BF6
and
insn_info not valid
$0xfffffffffffffff0
insn_info not valid
,
insn_info not valid
%rsp
insn_info not valid
Disassembled 4 bytes at 0x400BF9
push
insn_info not valid
%rax
insn_info not valid
Disassembled 1 bytes at 0x400BFD
push
insn_info not valid
%rsp
insn_info not valid
Disassembled 1 bytes at 0x400BFE
mov
insn_info not valid
$0x401450
insn_info not valid
,
insn_info not valid
%r8
insn_info not valid
Disassembled 7 bytes at 0x400BFF
mov
insn_info not valid
$0x4013c0
insn_info not valid
,
insn_info not valid
%rcx
insn_info not valid
Disassembled 7 bytes at 0x400C06
mov
insn_info not valid
$0x4012ce
insn_info not valid
,
insn_info not valid
%rdi
insn_info not valid
Disassembled 7 bytes at 0x400C0D
callq
insn_info not valid
0x0000000000400ad8
insn_info not valid
Disassembled 5 bytes at 0x400C14
hlt
insn_info not valid
Disassembled 1 bytes at 0x400C19
nop
insn_info not valid
Disassembled 1 bytes at 0x400C1A
nop
insn_info not valid
Disassembled 1 bytes at 0x400C1B
sub
insn_info not valid
$0x8
insn_info not valid
,
insn_info not valid
%rsp
insn_info not valid
Disassembled 4 bytes at 0x400C1C
mov
insn_info not valid
0x2013b9(%rip)
insn_info not valid
,
insn_info not valid
%rax
insn_info not valid
#
insn_info not valid
0x0000000000601fe0
insn_info not valid
Disassembled 7 bytes at 0x400C20
test
insn_info not valid
%rax
insn_info not valid
,
insn_info not valid
%rax
insn_info not valid
Disassembled 3 bytes at 0x400C27
je
insn_info not valid
0x0000000000400c2e
insn_info not valid
Disassembled 2 bytes at 0x400C2A
callq
insn_info not valid
*%rax
insn_info not valid
Disassembled 2 bytes at 0x400C2C
add
insn_info not valid
$0x8
insn_info not valid
,
insn_info not valid
%rsp
insn_info not valid
Disassembled 4 bytes at 0x400C2E
retq
Stopped disassembly
insn_info not valid
Disassembled 1 bytes at 0x400C32
我期望的是能够读取每条指令的指令信息通过 dinfo->insn_type
、target
等。该行为在 x86-32 和 x86-64 上都表现出来。如果我至少可以确认这在这两种架构上未实现,那么我可以自己填写这些信息。
I am writing a tool which uses libbfd
and libopcodes
in x86-32 and x86-64 Linux to perform disassembly. The problem is that whilst I am able to get libopcodes to disassemble, I am unable to get any instruction information. For the purposes of demonstration, I have made a minimal example which reproduces my issue. The program should disassemble itself from entry point to the first RET
/RETQ
.
The code is a bit hacked up with globals and error checking has been omitted for brevity, etc. but should illustrate the issue clearly.
#include <bfd.h>
#include <dis-asm.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <string.h>
#include <ctype.h>
#include <limits.h>
#include <libiberty.h>
/*
* Holds state for BFD and libopcodes.
*/
bfd * abfd = NULL;
disassemble_info dinfo = {0};
/*
* Temporary hack to signal when disassembling should stop.
*/
static bool stop_disassembling = FALSE;
/*
* Gets path to currently running executable.
*/
bool get_target_path(char * target_path, size_t size)
{
char * path;
ssize_t len;
pid_t pid = getpid();
sprintf(target_path, "/proc/%d/exe", (int)pid );
path = strdup(target_path);
len = readlink(path, target_path, size);
target_path[len] = '\0';
free(path);
return TRUE;
}
/*
* libopcodes appends spaces on the end of some instructions so for
* comparisons, we want to strip those first.
*/
void strip_tail(char * str, unsigned int size)
{
int i;
for(i = 0; i < size; i++) {
if(!isgraph(str[i])) {
str[i] = '\0';
break;
}
}
}
/*
* Checks whether the current instruction will cause the control flow to not
* proceed to the linearly subsequent instruction (e.g. ret, jmp, etc.)
*/
bool breaks_control_flow(char * str)
{
if(abfd->arch_info->bits_per_address == 64) {
if(strcmp(str, "retq") == 0) {
return TRUE;
}
} else {
if(strcmp(str, "ret") == 0) {
return TRUE;
}
}
return FALSE;
}
/*
* Used as a callback for libopcodes so we can do something useful with the
* disassembly. Currently this just outputs to stdout.
*/
int custom_fprintf(void * stream, const char * format, ...)
{
/* silly amount */
char str[128] = {0};
int rv;
va_list args;
va_start(args, format);
rv = vsnprintf(str, ARRAY_SIZE(str) - 1, format, args);
va_end(args);
puts(str);
strip_tail(str, ARRAY_SIZE(str));
if(breaks_control_flow(str)) {
puts("Stopped disassembly");
stop_disassembling = TRUE;
}
if(dinfo.insn_info_valid) {
switch(dinfo.insn_type) {
case dis_noninsn:
printf("not an instruction\n");
break;
case dis_nonbranch:
printf("not a branch\n");
break;
case dis_branch:
printf("is a branch\n");
break;
case dis_condbranch:
printf("is a conditional branch\n");
break;
case dis_jsr:
printf("jump to subroutine\n");
break;
case dis_condjsr:
printf("conditional jump to subroutine\n");
break;
case dis_dref:
printf("data reference in instruction\n");
break;
case dis_dref2:
printf("two data references in instruction\n");
break;
default:
printf("not enumerated\n");
break;
}
} else {
printf("insn_info not valid\n");
}
return rv;
}
/*
* Initialises libopcodes disassembler and returns an instance of it.
*/
disassembler_ftype init_disasm(bfd * abfd, disassemble_info * dinfo)
{
/* Override the stream the disassembler outputs to */
init_disassemble_info(dinfo, NULL, custom_fprintf);
dinfo->flavour = bfd_get_flavour(abfd);
dinfo->arch = bfd_get_arch(abfd);
dinfo->mach = bfd_get_mach(abfd);
dinfo->endian = abfd->xvec->byteorder;
disassemble_init_for_target(dinfo);
return disassembler(abfd);
}
/*
* Method of locating section from VMA taken from opdis.
*/
typedef struct {
bfd_vma vma;
asection * sec;
} BFD_VMA_SECTION;
/*
* Loads section and fills in dinfo accordingly. Since this function allocates
* memory in dinfo->buffer, callers need to call free once they are finished.
*/
bool load_section(bfd * abfd, disassemble_info * dinfo, asection * s)
{
int size = bfd_section_size(s->owner, s);
unsigned char * buf = xmalloc(size);
if(!bfd_get_section_contents(s->owner, s, buf, 0, size)) {
free(buf);
return FALSE;
}
dinfo->section = s;
dinfo->buffer = buf;
dinfo->buffer_length = size;
dinfo->buffer_vma = bfd_section_vma(s->owner, s);
printf("Allocated %d bytes for %s section\n: 0x%lX", size, s->name,
dinfo->buffer_vma);
return TRUE;
}
/*
* Used to locate section for a vma.
*/
void vma_in_section(bfd * abfd, asection * s, void * data)
{
BFD_VMA_SECTION * req = data;
if(req && req->vma >= s->vma &&
req->vma < (s->vma + bfd_section_size(abfd, s)) ) {
req->sec = s;
}
}
/*
* Locate and load section containing vma.
*/
bool load_section_for_vma(bfd * abfd, disassemble_info * dinfo,
bfd_vma vma)
{
BFD_VMA_SECTION req = {vma, NULL};
bfd_map_over_sections(abfd, vma_in_section, &req);
if(!req.sec) {
return FALSE;
} else {
return load_section(abfd, dinfo, req.sec);
}
}
/*
* Start disassembling from entry point.
*/
bool disassemble_entry(bfd * abfd, disassemble_info * dinfo,
disassembler_ftype disassembler)
{
bfd_vma vma = bfd_get_start_address(abfd);
/* First locate and load the section containing the vma */
if(load_section_for_vma(abfd, dinfo, vma)) {
int size;
/* Keep disassembling until signalled otherwise or error */
while(true) {
dinfo->insn_info_valid = 0;
size = disassembler(vma, dinfo);
printf("Disassembled %d bytes at 0x%lX\n", size, vma);
if(size == 0 || size == -1 || stop_disassembling) {
break;
}
vma += size;
}
free(dinfo->buffer);
return TRUE;
}
return FALSE;
}
int main(void)
{
char target_path[PATH_MAX] = {0};
bfd_init();
/* Get path for the running instance of this program */
get_target_path(target_path, ARRAY_SIZE(target_path));
abfd = bfd_openr(target_path, NULL);
if(abfd != NULL && bfd_check_format(abfd, bfd_object)) {
disassembler_ftype disassembler = init_disasm(abfd, &dinfo);
disassemble_entry(abfd, &dinfo, disassembler);
bfd_close(abfd);
}
return EXIT_SUCCESS;
}
This source can be built with the following makefile
. To perform a successful link, the binutils-dev
package needs to be installed on the local machine:
all:
gcc -Wall disasm.c -o disasm -lbfd -lopcodes
clean:
rm -f disasm
When run, the output is this:
Allocated 2216 bytes for .text section
: 0x400BF0xor
insn_info not valid
%ebp
insn_info not valid
,
insn_info not valid
%ebp
insn_info not valid
Disassembled 2 bytes at 0x400BF0
mov
insn_info not valid
%rdx
insn_info not valid
,
insn_info not valid
%r9
insn_info not valid
Disassembled 3 bytes at 0x400BF2
pop
insn_info not valid
%rsi
insn_info not valid
Disassembled 1 bytes at 0x400BF5
mov
insn_info not valid
%rsp
insn_info not valid
,
insn_info not valid
%rdx
insn_info not valid
Disassembled 3 bytes at 0x400BF6
and
insn_info not valid
$0xfffffffffffffff0
insn_info not valid
,
insn_info not valid
%rsp
insn_info not valid
Disassembled 4 bytes at 0x400BF9
push
insn_info not valid
%rax
insn_info not valid
Disassembled 1 bytes at 0x400BFD
push
insn_info not valid
%rsp
insn_info not valid
Disassembled 1 bytes at 0x400BFE
mov
insn_info not valid
$0x401450
insn_info not valid
,
insn_info not valid
%r8
insn_info not valid
Disassembled 7 bytes at 0x400BFF
mov
insn_info not valid
$0x4013c0
insn_info not valid
,
insn_info not valid
%rcx
insn_info not valid
Disassembled 7 bytes at 0x400C06
mov
insn_info not valid
$0x4012ce
insn_info not valid
,
insn_info not valid
%rdi
insn_info not valid
Disassembled 7 bytes at 0x400C0D
callq
insn_info not valid
0x0000000000400ad8
insn_info not valid
Disassembled 5 bytes at 0x400C14
hlt
insn_info not valid
Disassembled 1 bytes at 0x400C19
nop
insn_info not valid
Disassembled 1 bytes at 0x400C1A
nop
insn_info not valid
Disassembled 1 bytes at 0x400C1B
sub
insn_info not valid
$0x8
insn_info not valid
,
insn_info not valid
%rsp
insn_info not valid
Disassembled 4 bytes at 0x400C1C
mov
insn_info not valid
0x2013b9(%rip)
insn_info not valid
,
insn_info not valid
%rax
insn_info not valid
#
insn_info not valid
0x0000000000601fe0
insn_info not valid
Disassembled 7 bytes at 0x400C20
test
insn_info not valid
%rax
insn_info not valid
,
insn_info not valid
%rax
insn_info not valid
Disassembled 3 bytes at 0x400C27
je
insn_info not valid
0x0000000000400c2e
insn_info not valid
Disassembled 2 bytes at 0x400C2A
callq
insn_info not valid
*%rax
insn_info not valid
Disassembled 2 bytes at 0x400C2C
add
insn_info not valid
$0x8
insn_info not valid
,
insn_info not valid
%rsp
insn_info not valid
Disassembled 4 bytes at 0x400C2E
retq
Stopped disassembly
insn_info not valid
Disassembled 1 bytes at 0x400C32
What I am expecting is to be able to read instruction information for each instruction through the dinfo->insn_type
, target
, etc. The behaviour is exhibited on both x86-32 and x86-64. If I can at least get confirmation that this is unimplemented on these two architectures then I can go about filling in this information myself.
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(3)
不幸的是,从 binutils libopcodes 2.22 开始,在 i386 或 x86_64 上均未填充
insn_type
。唯一广泛支持的架构是 MIPS、Sparc 和 Cell 的 SPU。截至目前的 CVS HEAD 仍然如此。很难证明某些东西不存在,但例如,在 Sparc 反汇编程序源 您可以看到多次出现
insn_type
被设置,例如info->insn_type = dis_branch
,而在 i386 反汇编程序源 中没有出现insn_type
,也没有出现任何预期具有的值(dis_branch
,dis_nonbranch
等)。检查所有支持
insn_type
的 libopcodes 文件,您将得到:opcodes/mips-dis.c
opcodes/spu-dis.c
opcodes/microblaze-dis.c
opcodes/cris-dis.c
opcodes/sparc-dis.c
Unfortunately, as of binutils libopcodes 2.22,
insn_type
is not filled in on either i386 or x86_64. The only widespread supported architectures are MIPS, Sparc, and the Cell’s SPU. This is still true as of current CVS HEAD.It's hard to prove that something does not exist, but for instance, in the Sparc disassembler source you can see several occurrences of
insn_type
being set, for instanceinfo->insn_type = dis_branch
, whereas in the i386 disassembler source there are no occurrences ofinsn_type
nor any of the values it would be expected to have (dis_branch
,dis_nonbranch
etc.).Checking for all the libopcodes files that support
insn_type
you get:opcodes/mips-dis.c
opcodes/spu-dis.c
opcodes/microblaze-dis.c
opcodes/cris-dis.c
opcodes/sparc-dis.c
opcodes/mmix-dis.c
仅使用这些库来完成此操作将是一个极其痛苦和艰巨的过程。我认为你应该听 Necrolis 并使用一个已经做到这一点的库。我过去使用过 Dyninst (即 InstructionAPI + 解析API)。它们有很好的文档记录,并且将完全完成您想要做的事情。至少,花一个小时使用这个库并编译手册中的示例将为您提供一个应用程序,让您检查每条指令的操作码、每条指令的长度、每条指令的参数数量等。这些是 libopcodes 不会告诉您或处理的事情(它一次解码地址,不能保证是指令)。
以下是我从 Opdis 开发人员的手册 (如果你还没有读过,我建议你阅读一下,里面有很多关于 libopcodes 的好东西):
中除此之外,我认为您可能会得到被该列表中的第二项刺痛。也就是说,大多数(所有?)操作码都适合单个地址,并且与观察到的输出一致(例如,您得到
mov
和pop
以及一些寄存器参数)。但是,对于诸如可变长度指令或未在 4 字节边界精确排列的指令之类的棘手问题怎么办?你没有做任何事情来处理这些。我猜 Opdis 比你的程序更聪明——它知道如何在流中寻找以及寻找什么。也许有时它知道在反汇编之前需要读取两个地址而不是一个。从您的代码和 libopcodes 的描述来看,两者都没有这样做。
祝你好运!请记住阅读该手册,也许可以考虑使用 libopdis 来代替!
Doing this with just those libraries is going to be an extremely painful and arduous process. I think you should listen to Necrolis and use a library that already does this. I've used the Dyninst in the past (namely, the InstructionAPI + ParseAPI). They're very well documented, and will do exactly what you're trying to do. At the very least, spending an hour with this library and compiling their examples in the manuals will give you an application that will let you examine things like the opcodes of each instruction, length of each instruction, number of arguments to each instruction, etc. These are things that libopcodes does not tell you nor handle (it decodes addresses at a time, which aren't guaranteed to be instructions).
Here's a snippet from the developers of Opdis that I took from their manual (which I would suggest reading if you haven't, lots of good stuff in there about
libopcodes
):Among other things, I think you might be getting stung by the second item in that list. Namely, the fact that most (all?) opcodes would fit into a single address and would agree with the observed output (e.g., you're getting the
mov
andpop
and some register arguments). But what about tricky things like variable length instructions or instructions that aren't lining up exactly at the 4-byte boundaries? You're not doing anything to handle those.I'm guessing that Opdis is smarter than your program -- it knows how and what to look for in the stream. Perhaps sometimes it knows that it needs to read two addresses instead of just one before disassembling. From your code, and the description of libopcodes, neither is doing this.
Good luck! Remember to read that manual, and perhaps consider using
libopdis
instead!Libopcodes 将反汇编指令打印到由您的 custom_printf 函数拦截的流中。您的错误是您假设每次反汇编单个指令时都会调用 custom_printf 一次,但是,它被更频繁地调用,特别是打印每个助记符、操作数、地址或分隔符。
因此,二进制文件的反汇编结果是
Libopcodes prints disassembled instructions into the stream which is intercepted by your custom_printf function. Your mistake is that you assume that custom_printf is called once each time a single instruction is disassembled, however, it is called more often, particularly, to print each mnemonic,operand, address or separator.
So, resulting disassembly of your binary is