strtok 和内存泄漏
我使用 strtok() 编写了一个简单的 url 解析器。这是代码
#include <stdio.h>
#include <stdlib.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} aUrl;
void parse_url(char *url, aUrl *ret) {
printf("Parsing %s\n", url);
char *tmp = (char *)_strdup(url);
//char *protocol, *host, *port, *path;
int len = 0;
// protocol agora eh por exemplo http: ou https:
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
//printf("char at %d => %c", len, url[len]);
ret->path = (char *)_strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
// host agora é por exemplo address.com:8080
//tmp = (char *)_strdup(host);
//strtok(tmp, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if(tmp == NULL) {
if(strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if(strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
//host = (char *) strtok(NULL, "/");
}
/*
*
*/
int main(int argc, char** argv) {
printf("hello moto\n");
aUrl myUrl;
parse_url("http://teste.com/Teste/asdf#coisa", &myUrl);
printf("protocol is %s\nhost is %s\nport is %d\npath is %s\n", myUrl.protocol, myUrl.host, myUrl.port, myUrl.path);
return (EXIT_SUCCESS);
}
如您所见,我经常使用 strtok(),因此我可以“切片”网址。我不需要支持不同于 http 或 https 的 url,因此它的完成方式解决了我的所有问题。 我担心的是(这是在嵌入式设备上运行) - 我在浪费内存吗? 当我写类似的东西
ret->protocol = (char *) strtok(tmp, "/");
然后稍后调用
ret->protocol = (char *) strtok(ret->protocol, ":");
我的第一个指针 ret->protocol 保留在内存中吗?我想也许我应该将第一次调用设置为 tmp 指针,调用 strtok 将 ret->protocol 指向字符串的右侧部分(第二次调用),然后调用 free(tmp)。
使用 strtok 的最佳方式应该是什么?
I wrote a simple url parser using strtok(). here's the code
#include <stdio.h>
#include <stdlib.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} aUrl;
void parse_url(char *url, aUrl *ret) {
printf("Parsing %s\n", url);
char *tmp = (char *)_strdup(url);
//char *protocol, *host, *port, *path;
int len = 0;
// protocol agora eh por exemplo http: ou https:
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
//printf("char at %d => %c", len, url[len]);
ret->path = (char *)_strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
// host agora é por exemplo address.com:8080
//tmp = (char *)_strdup(host);
//strtok(tmp, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if(tmp == NULL) {
if(strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if(strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
//host = (char *) strtok(NULL, "/");
}
/*
*
*/
int main(int argc, char** argv) {
printf("hello moto\n");
aUrl myUrl;
parse_url("http://teste.com/Teste/asdf#coisa", &myUrl);
printf("protocol is %s\nhost is %s\nport is %d\npath is %s\n", myUrl.protocol, myUrl.host, myUrl.port, myUrl.path);
return (EXIT_SUCCESS);
}
As you can see, I use strtok() a lot so I can "slice" the url. I don't need to support urls different than http or https so the way it's done solves all of my problems.
My concern is (this is running on an embedded device) - Am I wasting memory ?
When I write something like
ret->protocol = (char *) strtok(tmp, "/");
And then later call
ret->protocol = (char *) strtok(ret->protocol, ":");
Does me first pointer ret->protocol held remain in memory ? I thought that maybe I should set the first call to a tmp pointer, call strtok pointing ret->protocol to the right portion of the string (the second call) and then free(tmp).
What should be the best way to use strtok ?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
发布评论
评论(4)
您知道可以使用 NULL 作为 strtok 的第一个参数继续解析字符串吗?
第一次调用:
char* token = strtok(string, delimiters);
然后:
token = strtok(NULL, other_delimiters);
这可以让您简化代码:
int parse_url(char *url, aUrl *ret)
{
//get protocol
char* token = strtok(url, "/");
if( token == NULL )
return -1;
strcpy(ret->protocol, token);
strcat(ret->protocol, "//");
// skip next '/'
token = strtok(NULL, "/");
if( token == NULL )
return -1;
//get host
token = strtok(NULL, "/");
if( token == NULL )
return -1;
strcpy(ret->host, token);
// get path
token = strtok(NULL, "#");
if( token == NULL )
return -1;
strcpy(ret->path, token);
// ...
return 0;
}
您可以看到我有一个返回值来知道解析是否成功完成。
感谢您分享您的代码!我在 valgrind 中运行它并修复了 strdup 函数生成的两个内存泄漏。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char *protocol;
char *host;
int port;
char *path;
} URL;
void parse_url(char *url, URL *ret) {
char *tmp = (char *) strdup(url);
int len = 0;
ret->protocol = (char *) strtok(tmp, "/");
len = strlen(ret->protocol) + 2;
ret->host = (char *) strtok(NULL, "/");
len += strlen(ret->host);
ret->path = (char *) strdup(&url[len]);
ret->path = (char *) strtok(ret->path, "#");
ret->protocol = (char *) strtok(ret->protocol, ":");
ret->host = (char *) strtok(ret->host, ":");
tmp = (char *) strtok(NULL, ":");
if (tmp == NULL) {
if (strcmp(ret->protocol, "http") == 0) {
ret->port = 80;
} else if (strcmp(ret->protocol, "https") == 0) {
ret->port = 443;
}
} else {
ret->port = atoi(tmp);
}
}
void free_url(URL *url) {
free(url->path);
free(url->protocol);
}
int main(int argc, char** argv) {
URL url;
parse_url("http://example.com:3000/Teste/asdf#coisa", &url);
printf("protocol: %s\nhost: %s\nport: %d\npath: %s\n", url.protocol, url.host, url.port, url.path);
free_url(&url);
return (EXIT_SUCCESS);
}
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
为了直接回答你的问题,strtok只返回一个指向你作为输入提供的字符串内的位置的指针——它不会为你分配新的内存,所以不需要在它给你的任何指针上调用free回来作为回报。
就其价值而言,您还可以研究“strchr”和“strstr”,它们是在字符串中搜索单个字符或序列的非破坏性方法。
另请注意,您的内存分配在这里存在问题 - 您使用 strdup() 在解析函数内分配一个新字符串,然后将该内存块的片段分配给“ret”字段。因此,您的调用者将负责释放 strdup 字符串,但由于您只是在 ret 内隐式地将该字符串传递回,因此调用者需要神奇地知道要传递给 free 的指针。 (可能是 ret-> 协议,但也可能不是,具体取决于输入的外观。)
To answer your question directly, strtok only returns a pointer to a location inside the string you give it as input-- it doesn't allocate new memory for you, so shouldn't need to call free on any of the pointers it gives you back in return.
For what it's worth, you could also look into "strchr" and "strstr", which are nondestructive ways of searching for single characters or sequences within strings.
Also note that your memory allocation is problematic here-- you're using strdup() to allocate a new string inside your parse function, and then you're assigning fragments of that memory block to fields of "ret". Your caller will thus be responsible for free'ing the strdup'd string, but since you're only passing that string back implicitly inside ret, the caller needs to know magically what pointer to pass to free. (Probably ret->protocol, but maybe not, depending on how the input looks.)