在C中为大.txt文件分配内存

发布于 2025-01-16 16:43:04 字数 1928 浏览 0 评论 0原文

我需要使用 malloc 或 calloc 为一个如下所示的大文件分配内存:

2357 VKLYKK
7947 1WTFWZ
3102 F2IXK3
2963 EXMW55
2865 50CJES
2510 8PC1AI

该 .txt 文件中有大约 10K 行。如何分配所需的内存?

该程序应该做什么?该程序必须读取整个 .txt 文件。按第一个数字对其进行排序并将输出发送到 out.txt。但由于文件的输入很大,它不会让我这样做。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#pragma warning(disable : 4996)

typedef struct {
    int number;
    char order[10];
} Data;

int sorting(const void *a, const void *b)
{
    Data *dataA = (Data *)a;
    Data *dataB = (Data *)b;

    // return (dataA->number - dataB->number);  // Ascending order
    return (dataB->number - dataA->number);  // Descending order
}

int main()
{
    FILE *fp;
    FILE *f = fopen("out.txt", "w");
    Data data[20];
    char *line[150]
    int i = 0;
    char file_name[10] = "";

    printf("enter file name: ");
    scanf("%s", &file_name);
    fp = fopen(file_name, "r");
    if (fp == NULL)
    {
        printf("\n%s\" File not found!", file_name);
        exit(1);
    }
    while (1)
    {
        if (fgets(line, 150, fp) == NULL)
            break;

        char *pch;
        pch = strtok(line, " ");
        data[i].number = atoi(pch);     
        pch = strtok(NULL, " ");
        strcpy(data[i].order, pch);
        i++;     
    }
    printf("#################\n");
    printf("number\torder\n"); 
    for (int k = 0; k < 10; k++)
    {
        printf("%d\t%s", data[k].number, data[k].order);
    }

    qsort(data, 10, sizeof(Data), sorting);

    printf("\n#################\n");
    printf("number\torder\n"); 
    for (int k = 0; k < 10; k++)
    {
        printf("%d\t%s", data[k].number, data[k].order);
        fprintf(f, "%d\t%s", data[k].number, data[k].order);
    }
    fclose(fp);
    fclose(f);
    return 0;
}

I need to allocate memory using malloc or calloc, for a large file that looks like this:

2357 VKLYKK
7947 1WTFWZ
3102 F2IXK3
2963 EXMW55
2865 50CJES
2510 8PC1AI

There are around 10K of lines in that .txt file. How can I allocate the required memory?

What is the program supposed to do? The program has to read the whole .txt file. Sort it by the first number and send output to out.txt. But since the the input of the file is huge it won't let me.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#pragma warning(disable : 4996)

typedef struct {
    int number;
    char order[10];
} Data;

int sorting(const void *a, const void *b)
{
    Data *dataA = (Data *)a;
    Data *dataB = (Data *)b;

    // return (dataA->number - dataB->number);  // Ascending order
    return (dataB->number - dataA->number);  // Descending order
}

int main()
{
    FILE *fp;
    FILE *f = fopen("out.txt", "w");
    Data data[20];
    char *line[150]
    int i = 0;
    char file_name[10] = "";

    printf("enter file name: ");
    scanf("%s", &file_name);
    fp = fopen(file_name, "r");
    if (fp == NULL)
    {
        printf("\n%s\" File not found!", file_name);
        exit(1);
    }
    while (1)
    {
        if (fgets(line, 150, fp) == NULL)
            break;

        char *pch;
        pch = strtok(line, " ");
        data[i].number = atoi(pch);     
        pch = strtok(NULL, " ");
        strcpy(data[i].order, pch);
        i++;     
    }
    printf("#################\n");
    printf("number\torder\n"); 
    for (int k = 0; k < 10; k++)
    {
        printf("%d\t%s", data[k].number, data[k].order);
    }

    qsort(data, 10, sizeof(Data), sorting);

    printf("\n#################\n");
    printf("number\torder\n"); 
    for (int k = 0; k < 10; k++)
    {
        printf("%d\t%s", data[k].number, data[k].order);
        fprintf(f, "%d\t%s", data[k].number, data[k].order);
    }
    fclose(fp);
    fclose(f);
    return 0;
}

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(3

梦在深巷 2025-01-23 16:43:04

如果您的文件包含 10,000 行左右,您的 while 循环将很快超出您的 data 数组(您仅使用 20 个元素声明)。如果事先不知道行数,最好的方法是使用不断增长的数组。首先初始化 data (以及新的 dataSizedataCount 变量),如下所示:

int dataSize = 0;
int dataCount = 0;
Data *data = NULL;

然后,当您用完数组中的空间时,当它达到dataSize 条目您将不得不增加您的数组。类似这样的:

while (1) {
    if (dataCount >= dataSize) {
        Data *new;
        dataSize += 1000;
        new = realloc(data,dataSize * sizeof *data);
        if (new == NULL) {
            perror("realloc");
            free(data);
            return 2;
        }
        data = new;
    }     
    int cnt = fscanf(fp,"%d %9s", &data[dataCount].number, data[dataCount].order);
    if (cnt == EOF)
        break;
    if (cnt != 2) {
        printf("Error reading data\n");
        return 1;
    }
    dataCount++;
}

当 while 循环结束时(如果没有错误),data 数组将包含所有数据,dataCount 将是数据总数找到的物品。

请注意,我使用了 fscanf 而不是 fgets,因为这消除了对中间步骤的需要,例如调用 atoistrcpy >。我还进行了一些简单的错误检查。我选择 1000 作为增长增量,但您可以更改它。但太小了,堆碎片会更快,而太大则太快需要大量内存。

If your file contains 10,000 lines or so, your while loop will quickly overrun your data array (which you declared with only 20 elements). If the number of lines is not known in advance, the best way to do this is with a growing array. Start by initialing data (and new dataSize and dataCount variables) as follows:

int dataSize = 0;
int dataCount = 0;
Data *data = NULL;

Then as you use up the space in the array, when it reaches dataSize entries you will have to grow your array. Something like this:

while (1) {
    if (dataCount >= dataSize) {
        Data *new;
        dataSize += 1000;
        new = realloc(data,dataSize * sizeof *data);
        if (new == NULL) {
            perror("realloc");
            free(data);
            return 2;
        }
        data = new;
    }     
    int cnt = fscanf(fp,"%d %9s", &data[dataCount].number, data[dataCount].order);
    if (cnt == EOF)
        break;
    if (cnt != 2) {
        printf("Error reading data\n");
        return 1;
    }
    dataCount++;
}

When the while loop finishes (if there were no errors), the data array will contain all of the data, and dataCount will be the total number of data items found.

Note that I used fscanf instead of fgets, as this eliminates the need for intermediate step like calls to atoi and strcpy. I also put in some simple error checking. I chose 1000 as the growth increment, though you can change that. But too small and it fragments the heap more rapidly, and too big requires larger amounts of memory too quickly.

谁把谁当真 2025-01-23 16:43:04

这一行

char* line[150];

创建了一个 150 个字符指针的数组,如果您正在阅读这样的一行,这不是您想要的,

if (fgets(line, 150, fp) == NULL) break;

我怀疑您想要一行 150 个字符,

所以这样做

 char line[150];

this line

char* line[150];

creates an array of 150 char pointers, this is not what you want if you are reading one line like this

if (fgets(line, 150, fp) == NULL) break;

I suspect you wanted one line of 150 chars

so do

 char line[150];
Oo萌小芽oO 2025-01-23 16:43:04

您可以使用 qsort 对行数组进行排序,但这可能不是最好的方法。将行插入到可以轻松按顺序遍历的数据结构中可能会更有效。尽管这种简单的解决方案远不理想,但这里有一个简单的插入树的示例。这按字典顺序对行进行排序;将其修改为基于行进行数字排序是一个很好的练习。

/* Build an (unbalanced) binary search tree of lines in input. */

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static void * xrealloc(void *buf, size_t num, size_t siz, void *end);
FILE * xfopen(const char *path, const char *mode);

struct entry {
    const char *line;
    struct entry *node[2];
};

static struct entry *
new_node(const char *line)
{
    struct entry *e = calloc(1, sizeof *e);
    if( e == NULL ){
        perror("calloc");
        exit(EXIT_FAILURE);
    }
    e->line = line;
    return e;
}

/*
 * Note that this tree needs to be rebalanced.  In a real
 * project, we would use existing libraries.
 */
static struct entry *
lookup(struct entry **lines, const char *line)
{
    struct entry *t = *lines;
    if( t ){
        int cmp = strcmp(line, t->line);
        return lookup(&t->node[cmp > 0], line);
    } else {
        return *lines = new_node(line);
    }
}

/* In-order descent of the tree, printing one line per entry */
static void
print_table(const struct entry *t)
{
    if( t ){
        print_table(t->node[0]);
        printf("%s", t->line);
        print_table(t->node[1]);
    }
}

static void *
xrealloc(void *buf, size_t num, size_t siz, void *endvp)
{
    char **endp = endvp;
    ptrdiff_t offset = endp && *endp ? *endp - (char *)buf : 0;
    buf = realloc(buf, num * siz);
    if( buf == NULL ){
        perror("realloc");
        exit(EXIT_FAILURE);
    }
    if( endp != NULL ){
        *endp = buf + offset;
    }
    return buf;
}

int
main(int argc, char **argv)
{
    FILE *ifp = argc > 1 ? xfopen(argv[1], "r") : stdin;
    struct entry *lines = NULL;
    char *line = NULL;
    size_t cap = 0;
    while( getline(&line, &cap, ifp) > 0 ){
        (void) lookup(&lines, line);
        line = NULL;
    }
    print_table(lines);
}

FILE *
xfopen(const char *path, const char *mode)
{
    FILE *fp = path[0] != '-' || path[1] != '\0' ? fopen(path, mode) :
        *mode == 'r' ? stdin : stdout;
    if( fp == NULL ){
        perror(path);
        exit(EXIT_FAILURE);
    }
    return fp;
}

You can use qsort to sort the array of lines, but that may not be the best approach. It may be more effective to insert the lines into a data structure that can be easily traversed in order. Although this simple minded solution is very much less than ideal, here's a simple-minded example of inserting into a tree. This sorts the lines lexicographically; modifying it to sort numerically based on the line is a good exercise.

/* Build an (unbalanced) binary search tree of lines in input. */

#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static void * xrealloc(void *buf, size_t num, size_t siz, void *end);
FILE * xfopen(const char *path, const char *mode);

struct entry {
    const char *line;
    struct entry *node[2];
};

static struct entry *
new_node(const char *line)
{
    struct entry *e = calloc(1, sizeof *e);
    if( e == NULL ){
        perror("calloc");
        exit(EXIT_FAILURE);
    }
    e->line = line;
    return e;
}

/*
 * Note that this tree needs to be rebalanced.  In a real
 * project, we would use existing libraries.
 */
static struct entry *
lookup(struct entry **lines, const char *line)
{
    struct entry *t = *lines;
    if( t ){
        int cmp = strcmp(line, t->line);
        return lookup(&t->node[cmp > 0], line);
    } else {
        return *lines = new_node(line);
    }
}

/* In-order descent of the tree, printing one line per entry */
static void
print_table(const struct entry *t)
{
    if( t ){
        print_table(t->node[0]);
        printf("%s", t->line);
        print_table(t->node[1]);
    }
}

static void *
xrealloc(void *buf, size_t num, size_t siz, void *endvp)
{
    char **endp = endvp;
    ptrdiff_t offset = endp && *endp ? *endp - (char *)buf : 0;
    buf = realloc(buf, num * siz);
    if( buf == NULL ){
        perror("realloc");
        exit(EXIT_FAILURE);
    }
    if( endp != NULL ){
        *endp = buf + offset;
    }
    return buf;
}

int
main(int argc, char **argv)
{
    FILE *ifp = argc > 1 ? xfopen(argv[1], "r") : stdin;
    struct entry *lines = NULL;
    char *line = NULL;
    size_t cap = 0;
    while( getline(&line, &cap, ifp) > 0 ){
        (void) lookup(&lines, line);
        line = NULL;
    }
    print_table(lines);
}

FILE *
xfopen(const char *path, const char *mode)
{
    FILE *fp = path[0] != '-' || path[1] != '\0' ? fopen(path, mode) :
        *mode == 'r' ? stdin : stdout;
    if( fp == NULL ){
        perror(path);
        exit(EXIT_FAILURE);
    }
    return fp;
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文