strtok 和内存泄漏

发布于 08-06 19:15 字数 2111 浏览 8 评论 0原文

我使用 strtok() 编写了一个简单的 url 解析器。这是代码

#include <stdio.h>
#include <stdlib.h>

typedef struct {
    char *protocol;
    char *host;
    int port;
    char *path;
} aUrl;


void parse_url(char *url, aUrl *ret) {

    printf("Parsing %s\n", url);
    char *tmp = (char *)_strdup(url);
    //char *protocol, *host, *port, *path;
    int len = 0;

    // protocol agora eh por exemplo http: ou https:
    ret->protocol = (char *) strtok(tmp, "/");
    len = strlen(ret->protocol) + 2;

    ret->host = (char *) strtok(NULL, "/");


    len += strlen(ret->host);

    //printf("char at %d => %c", len, url[len]);

    ret->path = (char *)_strdup(&url[len]);

    ret->path = (char *) strtok(ret->path, "#");

    ret->protocol = (char *) strtok(ret->protocol, ":");

    // host agora é por exemplo address.com:8080
    //tmp = (char *)_strdup(host);
    //strtok(tmp, ":");
    ret->host = (char *) strtok(ret->host, ":");
    tmp = (char *) strtok(NULL, ":");

    if(tmp == NULL) {
        if(strcmp(ret->protocol, "http") == 0) {
            ret->port = 80;
        } else if(strcmp(ret->protocol, "https") == 0) {
            ret->port = 443;
        }
    } else {
        ret->port = atoi(tmp);
    }


    //host = (char *) strtok(NULL, "/");




}

/*
 * 
 */
int main(int argc, char** argv) {
    printf("hello moto\n");

    aUrl myUrl;
    parse_url("http://teste.com/Teste/asdf#coisa", &myUrl);


    printf("protocol is %s\nhost is %s\nport is %d\npath is %s\n", myUrl.protocol, myUrl.host, myUrl.port, myUrl.path);

    return (EXIT_SUCCESS);
}

如您所见,我经常使用 strtok(),因此我可以“切片”网址。我不需要支持不同于 http 或 https 的 url,因此它的完成方式解决了我的所有问题。 我担心的是(这是在嵌入式设备上运行) - 我在浪费内存吗? 当我写类似的东西

ret->protocol = (char *) strtok(tmp, "/");

然后稍后调用

ret->protocol = (char *) strtok(ret->protocol, ":");

我的第一个指针 ret->protocol 保留在内存中吗?我想也许我应该将第一次调用设置为 tmp 指针,调用 strtok 将 ret->protocol 指向字符串的右侧部分(第二次调用),然后调用 free(tmp)。

使用 strtok 的最佳方式应该是什么?

I wrote a simple url parser using strtok(). here's the code

#include <stdio.h>
#include <stdlib.h>

typedef struct {
    char *protocol;
    char *host;
    int port;
    char *path;
} aUrl;


void parse_url(char *url, aUrl *ret) {

    printf("Parsing %s\n", url);
    char *tmp = (char *)_strdup(url);
    //char *protocol, *host, *port, *path;
    int len = 0;

    // protocol agora eh por exemplo http: ou https:
    ret->protocol = (char *) strtok(tmp, "/");
    len = strlen(ret->protocol) + 2;

    ret->host = (char *) strtok(NULL, "/");


    len += strlen(ret->host);

    //printf("char at %d => %c", len, url[len]);

    ret->path = (char *)_strdup(&url[len]);

    ret->path = (char *) strtok(ret->path, "#");

    ret->protocol = (char *) strtok(ret->protocol, ":");

    // host agora é por exemplo address.com:8080
    //tmp = (char *)_strdup(host);
    //strtok(tmp, ":");
    ret->host = (char *) strtok(ret->host, ":");
    tmp = (char *) strtok(NULL, ":");

    if(tmp == NULL) {
        if(strcmp(ret->protocol, "http") == 0) {
            ret->port = 80;
        } else if(strcmp(ret->protocol, "https") == 0) {
            ret->port = 443;
        }
    } else {
        ret->port = atoi(tmp);
    }


    //host = (char *) strtok(NULL, "/");




}

/*
 * 
 */
int main(int argc, char** argv) {
    printf("hello moto\n");

    aUrl myUrl;
    parse_url("http://teste.com/Teste/asdf#coisa", &myUrl);


    printf("protocol is %s\nhost is %s\nport is %d\npath is %s\n", myUrl.protocol, myUrl.host, myUrl.port, myUrl.path);

    return (EXIT_SUCCESS);
}

As you can see, I use strtok() a lot so I can "slice" the url. I don't need to support urls different than http or https so the way it's done solves all of my problems.
My concern is (this is running on an embedded device) - Am I wasting memory ?
When I write something like

ret->protocol = (char *) strtok(tmp, "/");

And then later call

ret->protocol = (char *) strtok(ret->protocol, ":");

Does me first pointer ret->protocol held remain in memory ? I thought that maybe I should set the first call to a tmp pointer, call strtok pointing ret->protocol to the right portion of the string (the second call) and then free(tmp).

What should be the best way to use strtok ?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(4

遥远的绿洲2024-08-13 19:15:36

为了直接回答你的问题,strtok只返回一个指向你作为输入提供的字符串内的位置的指针——它不会为你分配新的内存,所以不需要在它给你的任何指针上调用free回来作为回报。

就其价值而言,您还可以研究“strchr”和“strstr”,它们是在字符串中搜索单个字符或序列的非破坏性方法。

另请注意,您的内存分配在这里存在问题 - 您使用 strdup() 在解析函数内分配一个新字符串,然后将该内存块的片段分配给“ret”字段。因此,您的调用者将负责释放 strdup 字符串,但由于您只是在 ret 内隐式地将该字符串传递回,因此调用者需要神奇地知道要传递给 free 的指针。 (可能是 ret-> 协议,但也可能不是,具体取决于输入的外观。)

To answer your question directly, strtok only returns a pointer to a location inside the string you give it as input-- it doesn't allocate new memory for you, so shouldn't need to call free on any of the pointers it gives you back in return.

For what it's worth, you could also look into "strchr" and "strstr", which are nondestructive ways of searching for single characters or sequences within strings.

Also note that your memory allocation is problematic here-- you're using strdup() to allocate a new string inside your parse function, and then you're assigning fragments of that memory block to fields of "ret". Your caller will thus be responsible for free'ing the strdup'd string, but since you're only passing that string back implicitly inside ret, the caller needs to know magically what pointer to pass to free. (Probably ret->protocol, but maybe not, depending on how the input looks.)

今天小雨转甜2024-08-13 19:15:36

strtok 就地修改字符串,用 NULL 替换指定的字符。由于 C 中的字符串是以 NULL 结尾的,因此现在看来原始指针指向一个较短的字符串,即使原始字符串仍然存在并且仍然占用相同的内存量(但字符被替换为 NULL)。我认为字符串的末尾包含一个双 NULL。

简短的答案是:保留一个指向字符串缓冲区开头的指针,并在解析字符串时将另一个指针作为“当前”指针。当您使用 strtok 或以其他方式迭代字符串时,您会更新“当前”指针,但保留起始指针。完成后, free() 开始指针。没有内存泄漏。

strtok modifies the string in place, replacing the specified characters with NULL. Since strings in C are NULL-terminated, it now appears that your original pointer is pointing to a shorter string, even though the original string is still there and still occupies the same amount of memory (but with characters replaced with NULL). The end of the string, I think, contains a double-NULL.

The short answer is this: Keep a pointer to the beginning of your string buffer, and have another pointer that is your "current" pointer into the string as you parse it. When you use strtok or iterate over the string in other ways you update the "current" pointer but leave the beginning pointer alone. When you're finished, free() the beginning pointer. No memory leaked.

老街孤人2024-08-13 19:15:36

您知道可以使用 NULL 作为 strtok 的第一个参数继续解析字符串吗?

第一次调用:

char* token = strtok(string, delimiters);

然后:

token = strtok(NULL, other_delimiters);

这可以让您简化代码:

int parse_url(char *url, aUrl *ret)
{
//get protocol
char* token = strtok(url, "/");
if( token == NULL )
   return -1;
strcpy(ret->protocol, token);
strcat(ret->protocol, "//");

// skip next '/'
token = strtok(NULL, "/");
if( token == NULL )
   return -1;

//get host
token = strtok(NULL, "/");
if( token == NULL )
   return -1;
strcpy(ret->host, token);

// get path
token = strtok(NULL, "#");
if( token == NULL )
   return -1;
strcpy(ret->path, token);

// ...

return 0;
}

您可以看到我有一个返回值来知道解析是否成功完成。

Do you know you can continue parsing the string using NULL as first parameter of strtok?

First call:

char* token = strtok(string, delimiters);

Then:

token = strtok(NULL, other_delimiters);

This allow you to simplify your code:

int parse_url(char *url, aUrl *ret)
{
//get protocol
char* token = strtok(url, "/");
if( token == NULL )
   return -1;
strcpy(ret->protocol, token);
strcat(ret->protocol, "//");

// skip next '/'
token = strtok(NULL, "/");
if( token == NULL )
   return -1;

//get host
token = strtok(NULL, "/");
if( token == NULL )
   return -1;
strcpy(ret->host, token);

// get path
token = strtok(NULL, "#");
if( token == NULL )
   return -1;
strcpy(ret->path, token);

// ...

return 0;
}

You can see I had a return value to know if parsing was successfully done.

蒗幽2024-08-13 19:15:36

感谢您分享您的代码!我在 valgrind 中运行它并修复了 strdup 函数生成的两个内存泄漏。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char *protocol;
    char *host;
    int port;
    char *path;
} URL;

void parse_url(char *url, URL *ret) {
    char *tmp = (char *) strdup(url);
    int len = 0;

    ret->protocol = (char *) strtok(tmp, "/");
    len = strlen(ret->protocol) + 2;
    ret->host = (char *) strtok(NULL, "/");
    len += strlen(ret->host);
    ret->path = (char *) strdup(&url[len]);
    ret->path = (char *) strtok(ret->path, "#");
    ret->protocol = (char *) strtok(ret->protocol, ":");
    ret->host = (char *) strtok(ret->host, ":");
    tmp = (char *) strtok(NULL, ":");

    if (tmp == NULL) {
        if (strcmp(ret->protocol, "http") == 0) {
            ret->port = 80;
        } else if (strcmp(ret->protocol, "https") == 0) {
            ret->port = 443;
        }
    } else {
        ret->port = atoi(tmp);
    }

}

void free_url(URL *url) {
    free(url->path);
    free(url->protocol);
}

int main(int argc, char** argv) {
    URL url;
    parse_url("http://example.com:3000/Teste/asdf#coisa", &url);
    printf("protocol: %s\nhost: %s\nport: %d\npath: %s\n", url.protocol, url.host, url.port, url.path);
    free_url(&url);

    return (EXIT_SUCCESS);
}

Thanks for sharing your code! I ran it inside valgrind and fixed two memory leaks generated by strdup functions.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct {
    char *protocol;
    char *host;
    int port;
    char *path;
} URL;

void parse_url(char *url, URL *ret) {
    char *tmp = (char *) strdup(url);
    int len = 0;

    ret->protocol = (char *) strtok(tmp, "/");
    len = strlen(ret->protocol) + 2;
    ret->host = (char *) strtok(NULL, "/");
    len += strlen(ret->host);
    ret->path = (char *) strdup(&url[len]);
    ret->path = (char *) strtok(ret->path, "#");
    ret->protocol = (char *) strtok(ret->protocol, ":");
    ret->host = (char *) strtok(ret->host, ":");
    tmp = (char *) strtok(NULL, ":");

    if (tmp == NULL) {
        if (strcmp(ret->protocol, "http") == 0) {
            ret->port = 80;
        } else if (strcmp(ret->protocol, "https") == 0) {
            ret->port = 443;
        }
    } else {
        ret->port = atoi(tmp);
    }

}

void free_url(URL *url) {
    free(url->path);
    free(url->protocol);
}

int main(int argc, char** argv) {
    URL url;
    parse_url("http://example.com:3000/Teste/asdf#coisa", &url);
    printf("protocol: %s\nhost: %s\nport: %d\npath: %s\n", url.protocol, url.host, url.port, url.path);
    free_url(&url);

    return (EXIT_SUCCESS);
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文