返回结构时出现分段错误

发布于 2024-09-11 22:43:48 字数 2020 浏览 3 评论 0原文

我正在尝试做一件非常简单的事情 - 它正在读取一个文件,然后将其转换为 char** 将其分割成行。但是,当我返回包含 char** 和大小的结构时,我收到分段错误。我在这里读到: C 分段错误 before/during return 语句可能是“损坏的堆栈”。然而我仍然不知道我做了什么来破坏它。这是我的代码:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "comp_words.h"
#define BLOCK 4096

struct sized_str {
    char* str;
    long size;
};

struct sized_arr {
    char** content;
    int size;
};

struct sized_str readfile(char* name) {
    FILE *f;
    long filesize;
    char *buf;
    struct sized_str res;
    int r, p = 0;

    f = fopen(name, "r");
    fseek(f, 0, SEEK_END);
    filesize = ftell(f);
    rewind(f);
    buf = calloc(filesize + 1, sizeof(char));
    while ((r = fread(buf + p, sizeof(char), BLOCK, f))) {
        p += r;
    }
    res.str = buf;
    res.size = filesize + 1;

    return res;
}

struct sized_arr read_dict() {
    struct sized_str file_content;
    struct sized_arr result;
    char *buf, *buf_cpy, *buf_cpy_point, *line, **res;
    int i = 0, j, line_count = 0;

    file_content = readfile("/var/tmp/twl06.txt");
    buf = file_content.str;
    buf_cpy = (char*)malloc(file_content.size * sizeof(char));
    strcpy(buf_cpy, buf);
    buf_cpy_point = buf_cpy;

    while (strtok(buf_cpy_point, "\n\r")) {
        line_count++;
        buf_cpy_point = NULL;
    }

    res = (char**)malloc(sizeof(char*) * line_count);

    while ((line = strtok(buf, "\n\r"))) {
        res[i] = (char*)malloc(sizeof(char) * strlen(line));

        j = 0;
        while ((res[i][j] = tolower(line[j]))) {
            j++;
        }
        buf = NULL;
    }
    free(buf_cpy);
    result.size = line_count;
    result.content = res;

    return result;
}

// ...

int main (int argc, char** argv) {
    struct sized_str input;
    struct sized_arr dict;

    dict = read_dict();

    // ...
    return 0;

从 read_dict 函数返回时代码出现段错误。

I am trying to do a pretty simple thing - it is reading a file and then turning it into a char** splitting it into lines. However when I return a struct containing the char** and size i get Segmentation fault. I read here: C segmentation fault before/during return statement that it's probably "mangled stack". I still however don't know what I did to mangle it. This is my code:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include "comp_words.h"
#define BLOCK 4096

struct sized_str {
    char* str;
    long size;
};

struct sized_arr {
    char** content;
    int size;
};

struct sized_str readfile(char* name) {
    FILE *f;
    long filesize;
    char *buf;
    struct sized_str res;
    int r, p = 0;

    f = fopen(name, "r");
    fseek(f, 0, SEEK_END);
    filesize = ftell(f);
    rewind(f);
    buf = calloc(filesize + 1, sizeof(char));
    while ((r = fread(buf + p, sizeof(char), BLOCK, f))) {
        p += r;
    }
    res.str = buf;
    res.size = filesize + 1;

    return res;
}

struct sized_arr read_dict() {
    struct sized_str file_content;
    struct sized_arr result;
    char *buf, *buf_cpy, *buf_cpy_point, *line, **res;
    int i = 0, j, line_count = 0;

    file_content = readfile("/var/tmp/twl06.txt");
    buf = file_content.str;
    buf_cpy = (char*)malloc(file_content.size * sizeof(char));
    strcpy(buf_cpy, buf);
    buf_cpy_point = buf_cpy;

    while (strtok(buf_cpy_point, "\n\r")) {
        line_count++;
        buf_cpy_point = NULL;
    }

    res = (char**)malloc(sizeof(char*) * line_count);

    while ((line = strtok(buf, "\n\r"))) {
        res[i] = (char*)malloc(sizeof(char) * strlen(line));

        j = 0;
        while ((res[i][j] = tolower(line[j]))) {
            j++;
        }
        buf = NULL;
    }
    free(buf_cpy);
    result.size = line_count;
    result.content = res;

    return result;
}

// ...

int main (int argc, char** argv) {
    struct sized_str input;
    struct sized_arr dict;

    dict = read_dict();

    // ...
    return 0;

The code segfaults while returning from read_dict function.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

葬花如无物 2024-09-18 22:43:48

至少乍一看,这似乎有几个问题。首先:

while ((line = strtok(buf, "\n\r"))) {

要使用 strtok,您通常首先传递缓冲区,然后进行后续调用,为第一个参数传递“NULL”,直到 strtok返回 NULL(表明已到达缓冲区末尾)。 [编辑:经过进一步检查,很明显这并不是一个真正的错误 - 正如 @Casablanca 所指出的,他在循环中将 buf 设置为 NULL,因此第二次和后续迭代实际上确实为第一个参数传递 NULL —— 所以当前的代码有点难以理解并且(至少可以说)有些脆弱,但实际上并没有错误。]

其次,当你分配空间时,它看起来像你'没有为终止 NUL 分配空间:

res[i] = (char*)malloc(sizeof(char) * strlen(line));

至少乍一看,它看起来应该是:

res[i] = malloc(strlen(line)+1);

[顺便说一句,sizeof(char)==1 并转换 的返回值malloc 可以掩盖未能 #include 无法在范围内获得正确原型的错误。]

你的其他一些代码并不完全错误,但让我印象深刻可读性不如理想状态。例如:

j = 0;
while ((res[i][j] = tolower(line[j]))) {
    j++;
}

这似乎是一种相当混乱的书写方式:

for (j=0; line[j] != '\0'; j++)
    res[i][j] = tolower((unsigned char)line[j]);

另请注意,当您调用tolower时,您通常需要/想要将参数强制转换为unsigned char(传递负值会产生未定义的行为,并且在 char 带符号的典型情况下,相当多带有重音符号、变音符号等的字符通常会显示为负值)。

您似乎还存在内存泄漏 - read_dict 调用 readfile,它分配一个缓冲区(使用 calloc - 为什么不 malloc?)并返回一个指向结构中该内存的指针。 read_dict 接收该结构,但除非我错过了某些内容,否则该结构会超出范围,而无需释放它指向的内存。

我的第一反应是重新开始,而不是试图找到并解决您所看到的问题。在我看来,你让问题变得比实际情况复杂得多。如果我这样做,我可能会从一个函数开始分配空间并将一行读入空间,按以下顺序进行:

// Warning: Untested code.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *readline(FILE *file) { 
    char *buffer = NULL;
    size_t current_size = 1;
    char *temp;
    const int block_size = 256;

    do { 
        if (NULL == (temp = realloc(buffer, current_size+block_size)))
            break;
        buffer = temp;
        buffer[current_size-1] = '\0';
        if (fgets(buffer+current_size-1, block_size, file)==NULL) 
            return strlen(buffer) > 0 ? buffer : NULL;      
        current_size += block_size-1;
    } while (strchr(buffer, '\n') == NULL);

    strtok(buffer, "\n");
    if (NULL != (temp = realloc(buffer, strlen(buffer)+1)))
        buffer =temp;
    return buffer;
}

一旦工作正常,读取文件中的所有行并将它们转换为大写就出来了像这样的东西:

// Warning: more untested code.
while (res[i] = readline(file)) {
    size_t j;
    for (j=0; res[i][j]; j++)
        res[i][j] = toupper((unsigned char)res[i][j]);
    ++i;
}

At least at first glance, this seems to have a couple of problems. First:

while ((line = strtok(buf, "\n\r"))) {

To use strtok you normally pass the buffer on the first all, then make subsequent calls passing "NULL" for the first parameter until strtok returns a NULL (indicating that it's reached the end of the buffer). [Edit: upon further examination, it's apparent this isn't really a bug -- as pointed out by @Casablanca, he sets buf to NULL in the loop so the second and subsequent iterations actually do pass NULL for the first parameter -- so the current code is a bit hard to understand and (at least arguably) somewhat fragile, but not actually wrong.]

Second, when you allocate your space, it looks like you're not allocating space for the terminating NUL:

res[i] = (char*)malloc(sizeof(char) * strlen(line));

At least at first glance, it looks like this should be:

res[i] = malloc(strlen(line)+1);

[As an aside, sizeof(char)==1 and casting the return from malloc can mask the bug of failing to #include <stdlib.h> to get a proper prototype in scope.]

Some of your other code isn't exactly wrong, but strikes me as less readable than ideal. For example:

j = 0;
while ((res[i][j] = tolower(line[j]))) {
    j++;
}

This appears to be a rather obfuscated way of writing:

for (j=0; line[j] != '\0'; j++)
    res[i][j] = tolower((unsigned char)line[j]);

Also note that when you call tolower, you generally need/want to cast the parameter to unsigned char (passing a negative value gives undefined behavior, and quite a few characters with accents, umlauts, etc., will normally show up as negative in the typical case that char is signed).

You also seem to have a memory leak -- read_dict calls readfile, which allocates a buffer (with calloc -- why not malloc?) and returns a pointer to that memory in a structure. read_dict receives the structure, but unless I've missed something, the struct goes out of scope without your ever freeing the memory it pointed to.

Rather than try to find and fix the problem you've seen, my immediate reaction would be to start over. It seems to me that you've made the problem considerably more complex than it really is. If I were doing it, I'd probably start with a function to allocate space and read a line into the space, something on this order:

// Warning: Untested code.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

char *readline(FILE *file) { 
    char *buffer = NULL;
    size_t current_size = 1;
    char *temp;
    const int block_size = 256;

    do { 
        if (NULL == (temp = realloc(buffer, current_size+block_size)))
            break;
        buffer = temp;
        buffer[current_size-1] = '\0';
        if (fgets(buffer+current_size-1, block_size, file)==NULL) 
            return strlen(buffer) > 0 ? buffer : NULL;      
        current_size += block_size-1;
    } while (strchr(buffer, '\n') == NULL);

    strtok(buffer, "\n");
    if (NULL != (temp = realloc(buffer, strlen(buffer)+1)))
        buffer =temp;
    return buffer;
}

Once that's working, reading all the lines in the file and converting them to upper-case comes out something like:

// Warning: more untested code.
while (res[i] = readline(file)) {
    size_t j;
    for (j=0; res[i][j]; j++)
        res[i][j] = toupper((unsigned char)res[i][j]);
    ++i;
}
春庭雪 2024-09-18 22:43:48

看起来您忘记在将每一行存储到结果数组中后增加 i ,因此最终将所有行存储到 res[0] 中。但您仍然在最后设置 result.size = line_count,因此第一个之外的所有数组元素都是未定义的。此循环末尾的 i++while ((line = strtok(buf, "\n\r"))) 应该修复它。

It looks like you forgot to increment i after storing each line into the result array, so you end up storing all lines into res[0]. But you still set result.size = line_count at the end, so all array elements beyond the first are undefined. An i++ at the end of this loop: while ((line = strtok(buf, "\n\r"))) should fix it.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文