字符* str=“...”与 char str[]="..." 比较奇怪的行为

发布于 2024-12-02 11:05:27 字数 3619 浏览 1 评论 0 原文

可能的重复:
尝试设置字符时程序崩溃一个字符数组

我有一个按预期工作的示例代码:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char str[] ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}

...除非我将 char str[] 更改为 char* str ,这不会在语义上产生任何差异:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char * str ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}

这是意外的结果:

Splitting string "- This, a sample string." into tokens:
Segmentation fault

我编译两个例子都带有:

gcc -O0 main.c
gcc -O3 main.c
g++ -O0 main.c
g++ -O3 main.c

和甚至查看了程序集......但我无法弄清楚,第二个版本有什么问题。

这里是工作的 O1-Assembly:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC0:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC1:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    push    rbx
    .cfi_def_cfa_offset 16
    sub rsp, 48
    .cfi_def_cfa_offset 64
    mov rax, QWORD PTR fs:40
    mov QWORD PTR [rsp+40], rax
    xor eax, eax
    mov DWORD PTR [rsp], 1750343725
    mov DWORD PTR [rsp+4], 539784041
    mov DWORD PTR [rsp+8], 1634934881
    mov DWORD PTR [rsp+12], 1701605485
    mov DWORD PTR [rsp+16], 1920234272
    mov DWORD PTR [rsp+20], 778530409
    mov BYTE PTR [rsp+24], 0
    mov rdx, rsp
    mov esi, OFFSET FLAT:.LC0
    mov edi, 1
    .cfi_offset 3, -16
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC1
    mov rdi, rsp
    call    strtok
    mov eax, 0
    mov rdx, QWORD PTR [rsp+40]
    xor rdx, QWORD PTR fs:40
    je  .L3
    call    __stack_chk_fail
.L3:
    add rsp, 48
    pop rbx
    .p2align 4,,1
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits

和损坏的 O1-Assembly:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "- This, a sample string."
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC1:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1
.LC2:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    sub rsp, 8
    .cfi_def_cfa_offset 16
    mov edx, OFFSET FLAT:.LC0
    mov esi, OFFSET FLAT:.LC1
    mov edi, 1
    mov eax, 0
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC2
    mov edi, OFFSET FLAT:.LC0
    call    strtok
    mov eax, 0
    add rsp, 8
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits

我能看到的唯一明显的区别是,在工作版本中,GCC 直接在代码中用 MOV 替换字符串常量。

非常感谢帮助

编辑 gcc (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5,

祝一切顺利, 托马斯

Possible Duplicate:
Program crashes when trying to set a character of a char array

I have a sample code which works as expected:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char str[] ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}

... unless I change char str[] to char* str which shouldn't make any differences in semantics:

/* strtok example */
#include <stdio.h>
#include <string.h>

int main ()
{
  char * str ="- This, a sample string.";
  char * pch;
  printf ("Splitting string \"%s\" into tokens:\n",str);
  pch = strtok (str," ,.-");
/*
  while (pch != NULL)
  {
    printf ("%s\n",pch);
    pch = strtok (NULL, " ,.-");
  }
*/
  return 0;
}

This is the unexpected result:

Splitting string "- This, a sample string." into tokens:
Segmentation fault

I compiled both examples with:

gcc -O0 main.c
gcc -O3 main.c
g++ -O0 main.c
g++ -O3 main.c

and even looked at the assembly ... But I can't figure out, what's wrong with the second version.

Here the working O1-Assembly:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC0:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC1:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    push    rbx
    .cfi_def_cfa_offset 16
    sub rsp, 48
    .cfi_def_cfa_offset 64
    mov rax, QWORD PTR fs:40
    mov QWORD PTR [rsp+40], rax
    xor eax, eax
    mov DWORD PTR [rsp], 1750343725
    mov DWORD PTR [rsp+4], 539784041
    mov DWORD PTR [rsp+8], 1634934881
    mov DWORD PTR [rsp+12], 1701605485
    mov DWORD PTR [rsp+16], 1920234272
    mov DWORD PTR [rsp+20], 778530409
    mov BYTE PTR [rsp+24], 0
    mov rdx, rsp
    mov esi, OFFSET FLAT:.LC0
    mov edi, 1
    .cfi_offset 3, -16
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC1
    mov rdi, rsp
    call    strtok
    mov eax, 0
    mov rdx, QWORD PTR [rsp+40]
    xor rdx, QWORD PTR fs:40
    je  .L3
    call    __stack_chk_fail
.L3:
    add rsp, 48
    pop rbx
    .p2align 4,,1
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits

and the broken one:

    .file   "main.c"
    .intel_syntax noprefix
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "- This, a sample string."
    .section    .rodata.str1.8,"aMS",@progbits,1
    .align 8
.LC1:
    .string "Splitting string \"%s\" into tokens:\n"
    .section    .rodata.str1.1
.LC2:
    .string " ,.-"
    .text
.globl main
    .type   main, @function
main:
.LFB58:
    .cfi_startproc
    sub rsp, 8
    .cfi_def_cfa_offset 16
    mov edx, OFFSET FLAT:.LC0
    mov esi, OFFSET FLAT:.LC1
    mov edi, 1
    mov eax, 0
    call    __printf_chk
    mov esi, OFFSET FLAT:.LC2
    mov edi, OFFSET FLAT:.LC0
    call    strtok
    mov eax, 0
    add rsp, 8
    ret
    .cfi_endproc
.LFE58:
    .size   main, .-main
    .ident  "GCC: (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5"
    .section    .note.GNU-stack,"",@progbits

The only obvious difference I can see is that in the working version GCC substitutes the string constant by MOVs directly in the code.

Help is very appreciated

edit
gcc (Ubuntu/Linaro 4.4.4-14ubuntu5) 4.4.5,

All the best,
Thomas

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(4

挽梦忆笙歌 2024-12-09 11:05:27

在第二种情况下,您将 str 指向内存中某个位置的静态对象,该对象无法更改。 strtok 手册页警告说它更改了其第一个参数并且不能用于常量字符串。因此出现了错误。

In the second case, you're pointing str at a static object somewhere in memory that can't be changed. The strtok man page warns that it changes its first argument and can't be used on a constant string. Hence the error.

围归者 2024-12-09 11:05:27

strtok() 需要一个可修改缓冲区,因为它用空字节替换分隔符。所以你不能说 char * str = "- This, a example string.";,因为那实际上应该是 const char * str = "- This, a example string."; 并指向只读存储器。相反,您有多种选择:

char str[] = "- This, a sample string.";  // local array
char * pch = strtok (str," ,.-");


char * str = strdup("- This, a sample string.");  // malloc()ed
char * pch = strtok (str," ,.-");
/* ... */
free(str);

strtok() requires a modifiable buffer, because it replaces the delimiter by a null byte. So you cannot say char * str = "- This, a sample string.";, because that should really have been const char * str = "- This, a sample string."; and points to read-only memory. Instead, you have several options:

char str[] = "- This, a sample string.";  // local array
char * pch = strtok (str," ,.-");


char * str = strdup("- This, a sample string.");  // malloc()ed
char * pch = strtok (str," ,.-");
/* ... */
free(str);
梦初启 2024-12-09 11:05:27

char * str 为指向字符串的指针分配空间,该字符串恰好是常量文字(即不可写)。

char str[] 为数组分配空间,数组的大小由指定的文字指定。该数组是可写的。

strtok() 修改它所处理的字符串。 str[] 允许这样做,但 *str 不允许这样做。

char * str allocates room for a pointer to a string that happens to be a constant literal (i.e., not writable).

char str[] allocates room for an array whose size is specified by the assigned literal. The array is writable.

strtok() modifies the string it works on. This is allowed with str[] but not with *str.

葬花如无物 2024-12-09 11:05:27

当您使用 char[] p = "literal" 时,许多编译器将分配适当长度的字符数组,然后将字符串从保存字符串常量的位置复制到数组中,因此您最终得到字符串的可修改副本。

当您使用 char* p = "literal" 时,您将拥有一个指向该字符串的不可修改副本的指针。当您尝试修改它时,行为是未定义的。事实上,在某些时候,当您执行 char *p = "literal" 时,g++ 开始发出警告,因为指定它的正确方法是 const char* p="literal" code> 因为它是指向常量字符串的指针。

When you use char[] p = "literal", the many a compiler will allocate a character array of the appropriate length, and then copies the string from wherever string constants are kept into the array, so you end up with modifiable copy of the string.

When you use char* p = "literal", you have a pointer that point to that unmodifiable copy of the string. When you attempt to modify it, the behavior is undefined. In fact, at some point g++ started issuing a warning when you do char *p = "literal", because the correct way to specify it is const char* p="literal" since it is a pointer to a constant string.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文