strtok 问题,它没有按预期工作

发布于 2024-12-22 23:57:34 字数 5112 浏览 1 评论 0原文

我有一个读取格式化文件的函数。它看起来像这样:

1;Name_of_the_author;The date when the quote was published;The author of the quote;The quote
2;Name_of_the_author_2;The date when the second quote was published;The author of the second quote;The second quote

所以,分隔符是 ; 。我要做的就是检查每个序列/标记并检查它是否正确。然而问题是,它没有获得所有令牌,只是获得前三个令牌,在它刚刚中断的日期之后,它不会移动......这是附加的代码函数。忽略评论,这是一个学校项目,评论是罗马尼亚语的。

int svnCheckDb()
{
    FILE *file;
    int k, p, i=2, m, j=0;
    char mystring[1000000], *var, *var2, *string;
    file = fopen("db.txt", "r"); //deschidem fisierul
    if(file == NULL) {
        return 0;
    }
    else {
         //il putem accesa.
        while(fgets(mystring, 1000000, file) ) {
            if(j != 0)
            {
                //nu luam si prima linie cu descrierea repo-ului, prelucram doar citatele, j-ul numara randul pe care suntem
                //separam cu strtok linia citita si verificam fiecare informatie in parte pentru a fi corecta
                var = strtok(mystring, ";");
                k=1;
                /*
                k numara string-urile citite din descrierea citatelor tocmai citita. Primul e numarul de ordine, al doilea e utilizatorul
                care a adaugat citatul, al treilea reprezinta data adaugarii citatului, dupa care urmeaza citatul.
                */
                while(var != NULL) {
                    printf("k is %d and var is %s \n", k, var);
                    switch(k)
                    {
                        case 1:
                           //numarul de ordine. Daca e 0, inseamna ca nu e numar, returnam false
                            i = atoi(var);
                            if(i == 0)
                                return 0;
                            break;
                        case 2:
                            //utilizatorul care a adaugat citatul. Daca e gol sau nu e format doar din caractere a-z A-Z, returnam false
                            for( m = 0; m < strlen(var); m++ )
                                if(!isalpha(var[m]))
                                   return 0;
                            break;
                        case 3:
                            //data la care a fost adaugat citatul. Intrucat folosim formatul DD MM YY cu spatii intre ele, vom verifica daca e ok in fisier
                            string = var;
                            var2 = strtok(string, " ");
                            p=1; //folosim p sa vedem daca am ajuns la zi, luna sau an
                            while(var2 != NULL)
                            {
                                switch(p)
                                {
                                    case 1:
                                        //ziua
                                        i = atoi(var2);
                                        if(i == 0)
                                            return 0;
                                        else if(i > 31 || i < 1)
                                            return 0;
                                        break;
                                    case 2:
                                        //luna, care e formata din primele 3 caractere ale lunii si trebuie sa respecte formatul acesta
                                        if( strlen(var2) == 3)
                                        {
                                            for( m = 0; m < strlen(var2); m++ )
                                                if(!isalpha(var2[m]))
                                                    return 0;
                                        }
                                        else return 0;
                                        break;
                                    case 3:
                                        //anul.
                                        i = atoi(var2);
                                        if(i == 0)
                                            return 0;
                                        break;
                                }

                                var2 = strtok(NULL, " ");
                                p++;
                            }
                            break;
                        case 4:
                            //cine a adaugat citatul, vom folosi functia searchAuthor dupa ce va fi gata.
                            for( m = 0; m < strlen(var); m++ )
                                if(!isalpha(var[m]))
                                   return 0;
                            break;
                        case 5:
                            //citatul
                            if(strlen(var) == 0)
                                return 0;
                            printf("%d x \n", strlen(var));
                    }
                    var = strtok(NULL, ";"); //trecem la urmatorul sir de car separat de ;
                    k++;
                }
            }
            j++; //trecem la urmatoarea linie
        }
    }
    return 1;
}

k 只能达到 3,所以它只能得到数字、作者和日期。没有引用,也没有作者。所以我无法检查它们是否属实

I have a function that reads a formatted file. It looks like this:

1;Name_of_the_author;The date when the quote was published;The author of the quote;The quote
2;Name_of_the_author_2;The date when the second quote was published;The author of the second quote;The second quote

So, the delimiter is ; . What I have to do is to check every sequence/token and to check if it's correct. The problem however is that it doesn't get all the tokens, just the first three, after the date it just breaks, it doesn't move through... here's the attached code function. Ignore the comments, it's for a school project and the comments are in romanian.

int svnCheckDb()
{
    FILE *file;
    int k, p, i=2, m, j=0;
    char mystring[1000000], *var, *var2, *string;
    file = fopen("db.txt", "r"); //deschidem fisierul
    if(file == NULL) {
        return 0;
    }
    else {
         //il putem accesa.
        while(fgets(mystring, 1000000, file) ) {
            if(j != 0)
            {
                //nu luam si prima linie cu descrierea repo-ului, prelucram doar citatele, j-ul numara randul pe care suntem
                //separam cu strtok linia citita si verificam fiecare informatie in parte pentru a fi corecta
                var = strtok(mystring, ";");
                k=1;
                /*
                k numara string-urile citite din descrierea citatelor tocmai citita. Primul e numarul de ordine, al doilea e utilizatorul
                care a adaugat citatul, al treilea reprezinta data adaugarii citatului, dupa care urmeaza citatul.
                */
                while(var != NULL) {
                    printf("k is %d and var is %s \n", k, var);
                    switch(k)
                    {
                        case 1:
                           //numarul de ordine. Daca e 0, inseamna ca nu e numar, returnam false
                            i = atoi(var);
                            if(i == 0)
                                return 0;
                            break;
                        case 2:
                            //utilizatorul care a adaugat citatul. Daca e gol sau nu e format doar din caractere a-z A-Z, returnam false
                            for( m = 0; m < strlen(var); m++ )
                                if(!isalpha(var[m]))
                                   return 0;
                            break;
                        case 3:
                            //data la care a fost adaugat citatul. Intrucat folosim formatul DD MM YY cu spatii intre ele, vom verifica daca e ok in fisier
                            string = var;
                            var2 = strtok(string, " ");
                            p=1; //folosim p sa vedem daca am ajuns la zi, luna sau an
                            while(var2 != NULL)
                            {
                                switch(p)
                                {
                                    case 1:
                                        //ziua
                                        i = atoi(var2);
                                        if(i == 0)
                                            return 0;
                                        else if(i > 31 || i < 1)
                                            return 0;
                                        break;
                                    case 2:
                                        //luna, care e formata din primele 3 caractere ale lunii si trebuie sa respecte formatul acesta
                                        if( strlen(var2) == 3)
                                        {
                                            for( m = 0; m < strlen(var2); m++ )
                                                if(!isalpha(var2[m]))
                                                    return 0;
                                        }
                                        else return 0;
                                        break;
                                    case 3:
                                        //anul.
                                        i = atoi(var2);
                                        if(i == 0)
                                            return 0;
                                        break;
                                }

                                var2 = strtok(NULL, " ");
                                p++;
                            }
                            break;
                        case 4:
                            //cine a adaugat citatul, vom folosi functia searchAuthor dupa ce va fi gata.
                            for( m = 0; m < strlen(var); m++ )
                                if(!isalpha(var[m]))
                                   return 0;
                            break;
                        case 5:
                            //citatul
                            if(strlen(var) == 0)
                                return 0;
                            printf("%d x \n", strlen(var));
                    }
                    var = strtok(NULL, ";"); //trecem la urmatorul sir de car separat de ;
                    k++;
                }
            }
            j++; //trecem la urmatoarea linie
        }
    }
    return 1;
}

And k gets only to 3, so it gets only the number, the author and the date. No quote and no author. So I can't check them and see if it's true

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

才能让你更想念 2024-12-29 23:57:34

在这里:

string = var;
var2 = strtok(string, " ");

你遇到麻烦了。 strtok 忘记了它曾经有一个较长的字符串需要标记,现在它只记得之前返回的部分。您可以使用 strtok_r 同时标记不同的字符串。请阅读手册页以获取更多详细信息。

如果 strtok_r 不可用,在您的情况下,这将是在 case 3 的内部循环中进行解析而不使用 strtok 的最简单方法>。由于预期的格式是严格的,因此按顺序检查这三个字段并不复杂。如果你敢的话,你还可以存储 strtok 插入缓冲区的 '\0' 字节的位置,并在内部循环之后,根据需要将它们替换为 ' ' 或 ';' ,并将重新修改的缓冲区再次提供给strtok。但这很容易出错,我强烈建议不要尝试这样做。

Here:

string = var;
var2 = strtok(string, " ");

You get into trouble. strtok forgets it once had a longer string to tokenise, now all it remembers is the part it previously returned. You can tokenise different strings at the same time with strtok_r. Read the man page for more detail.

If strtok_r isn't available, in your situation it would be the easiest way to do the parsing in the inner loop in case 3 without using strtok. Since the expected format is rigid, it's not complicated to check the three fields in order. You can, if you dare, also store the location of the '\0' bytes that strtok inserted into the buffer, and after the inner loop, replace them with ' ' or ';', as appropriate, and feed the re-modified buffer again to strtok. But that's very error-prone, I strongly advise against trying that.

聊慰 2024-12-29 23:57:34

您可以首先去掉第一个循环和其他变量。
第一个 strtok 必须位于将帮助您划分每个标记的循环之外,必须这样做才能存储要在 strtok 函数中处理的缓冲区。
除非您确定不想再分割主数据,否则不能重用 strtok 函数,因为如果您在主处理结束之前重用 strtok,则会重置 strtok 函数使用的数据。
示例:

char str[] = "hello world how are you?\n";
char *res;
// here i tell strtok the string str is the one i want to separate
res = strtok(str, " \n");
int i = 0;
// here i separate str, using the caracters space and endline as separators
while (res != null)
{
 res = strtok(NULL, " \n"); // each time i pass in this part of the loop i get my new     word in res
 ++i; // here the variable i represents the number of times i enter the loop
}

// here i can use again strtok with another string

如果您的作业中允许使用 sscanf 函数,并且由于您似乎知道文件的确切格式,因此您可能需要使用它。
getline 函数还允许您逐行获取文件,并且您可以一次处理每个句子。

You can start by taking your first loop away and other variables too.
The first strtok has to be outside the loop that is going to help you divide each token, this has to be done in order to store the buffer you want to treat in the strtok function.
You can't reuse the strtok function until you are certain that you do not want to divide your main data anymore, because if you reuse strtok before the end of the main treatment you are reseting the data used by the strtok function.
example:

char str[] = "hello world how are you?\n";
char *res;
// here i tell strtok the string str is the one i want to separate
res = strtok(str, " \n");
int i = 0;
// here i separate str, using the caracters space and endline as separators
while (res != null)
{
 res = strtok(NULL, " \n"); // each time i pass in this part of the loop i get my new     word in res
 ++i; // here the variable i represents the number of times i enter the loop
}

// here i can use again strtok with another string

If the sscanf function is allowed in your assignment and since you seem to know the exact format of your file, you may want to use it.
Also the getline function allows you to fetch line by line of your file, and you could treat each sentence at a time.

ぃ双果 2024-12-29 23:57:34

示例 strtok 代码,创建一个指向 strtok 从大字符串中删除的每个元素的指针数组。

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_SPLIT 10

void split(char **result, char *working, const char *delim)
{
          int i;
          char *p=strtok(working, delim);
          for(i=0; p!=NULL && i<MAX_SPLIT; p=strtok(NULL, delim), i++ )
          {
              result[i]=p;
              result[i+1]=NULL;
          }
}

int foo(const char *splitme, const char *delim)
{
    int retval=0;
    char *result[MAX_SPLIT]={NULL};
    char *working=strdup(splitme);
    int i=0;
    if(working!=NULL)
    {
          split(result, working, delim);
        retval=1;
        while(result[i]!=NULL)        
              printf("%s\n", result[i++]);                     
        free(working);      
    }
    return retval;
}

sample strtok code, creates an array of pointers to each element strtok whacks off the big string.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define MAX_SPLIT 10

void split(char **result, char *working, const char *delim)
{
          int i;
          char *p=strtok(working, delim);
          for(i=0; p!=NULL && i<MAX_SPLIT; p=strtok(NULL, delim), i++ )
          {
              result[i]=p;
              result[i+1]=NULL;
          }
}

int foo(const char *splitme, const char *delim)
{
    int retval=0;
    char *result[MAX_SPLIT]={NULL};
    char *working=strdup(splitme);
    int i=0;
    if(working!=NULL)
    {
          split(result, working, delim);
        retval=1;
        while(result[i]!=NULL)        
              printf("%s\n", result[i++]);                     
        free(working);      
    }
    return retval;
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文