C++ fstream输出错误数据

发布于 2024-12-15 10:36:58 字数 3290 浏览 2 评论 0原文

上下文第一:

我的程序执行一些并行计算,并将其记录在文件中。线程按块分组(我正在使用 CUDA)。日志文件的格式如下:

#begin run
({blockIdx,threadIdx}) {thread_info}
({blockIdx,threadIdx}) {thread_info}
...
#end run

我编写了一个函数,该函数应该读取日志文件并按线程对每个运行消息进行排序。

//------------------------------------------------------------------------------
// Comparison struct for log file sorting
//------------------------------------------------------------------------------
typedef struct
{
    bool operator()(const string &rString1 , const string &rString2)
    {
        int closeParenthesisLocalition1 = rString1.find_first_of(')');
        int closeParenthesisLocalition2 = rString2.find_first_of(')');
        int compResult = rString1.compare(0 , closeParenthesisLocalition1 + 2 , rString2 , 0 , closeParenthesisLocalition2 + 2);
        return (compResult < 0);
    }
} comp;

//------------------------------------------------------------------------------------
// Sort the log file. Lines with same prefix (blockIdx,ThreadIdx) will be grouped in file per run.
//------------------------------------------------------------------------------------
void CudaUnitTest::sortFile()
{
    comp comparison;
    deque<string> threadsPrintfs;
    ifstream inputFile(m_strInputFile);
    assert(inputFile.is_open());

    //Read whole input file and close it. Saves disk accesses.
    string strContent((std::istreambuf_iterator<char>(inputFile)), std::istreambuf_iterator<char>());
    inputFile.close();

    ofstream outputFile(m_strOutputFile);
    assert(outputFile.is_open());

    string strLine;
    int iBeginRunIdx = -10; //value just to addapt on while loop (to start on [0])
    int iBeginRunNewLineOffset = 10; //"idx offset to a new line char in string. Starts with the offset of the string "#begin run\n".
    int iEndRunIdx;
    int iLastNewLineIdx;
    int iNewLineIdx;

    while((iBeginRunIdx = strContent.find("#begin run\n" , iBeginRunIdx + iBeginRunNewLineOffset)) != string::npos)
    {
        iEndRunIdx = strContent.find("#end run\n" , iBeginRunIdx + iBeginRunNewLineOffset);
        assert(iEndRunIdx != string::npos);

        iLastNewLineIdx = iBeginRunIdx + iBeginRunNewLineOffset;
        while((iNewLineIdx = strContent.find("\n" , iLastNewLineIdx + 1)) < iEndRunIdx)
        {
            strLine = strContent.substr(iLastNewLineIdx + 1 , iNewLineIdx);
            if(verifyPrefix(strLine))
                threadsPrintfs.push_back(strLine);
            iLastNewLineIdx = iNewLineIdx;
        }

        //sort last run info
        sort(threadsPrintfs.begin() , threadsPrintfs.end() , comparison);
        threadsPrintfs.push_front("#begin run\n");
        threadsPrintfs.push_back("#end run\n");

        //output it
        for(deque<string>::iterator it = threadsPrintfs.begin() ; it != threadsPrintfs.end() ; ++it)
        {
            assert(outputFile.good());
            outputFile.write(it->c_str() , it->size());
        }
        outputFile.flush();
        threadsPrintfs.clear();
    }

    outputFile.close();
}

问题是生成的文件有很多垃圾数据。例如,6KB 的输入日志文件生成 192KB 的输出日志!看起来输出文件与输入文件有很多重复。不过,在调试代码时,双端队列在排序之前和之后显示了正确的值。我认为 ofstream 编写本身有问题。

编辑:该函数不是并行运行的。

Context first:

My program do some parallel calculation which are logged in a file. Threads are grouped by blocks (I'm using CUDA). The log file is formated this way:

#begin run
({blockIdx,threadIdx}) {thread_info}
({blockIdx,threadIdx}) {thread_info}
...
#end run

I've wrote a function that should read the log file and sort each run messages by thread.

//------------------------------------------------------------------------------
// Comparison struct for log file sorting
//------------------------------------------------------------------------------
typedef struct
{
    bool operator()(const string &rString1 , const string &rString2)
    {
        int closeParenthesisLocalition1 = rString1.find_first_of(')');
        int closeParenthesisLocalition2 = rString2.find_first_of(')');
        int compResult = rString1.compare(0 , closeParenthesisLocalition1 + 2 , rString2 , 0 , closeParenthesisLocalition2 + 2);
        return (compResult < 0);
    }
} comp;

//------------------------------------------------------------------------------------
// Sort the log file. Lines with same prefix (blockIdx,ThreadIdx) will be grouped in file per run.
//------------------------------------------------------------------------------------
void CudaUnitTest::sortFile()
{
    comp comparison;
    deque<string> threadsPrintfs;
    ifstream inputFile(m_strInputFile);
    assert(inputFile.is_open());

    //Read whole input file and close it. Saves disk accesses.
    string strContent((std::istreambuf_iterator<char>(inputFile)), std::istreambuf_iterator<char>());
    inputFile.close();

    ofstream outputFile(m_strOutputFile);
    assert(outputFile.is_open());

    string strLine;
    int iBeginRunIdx = -10; //value just to addapt on while loop (to start on [0])
    int iBeginRunNewLineOffset = 10; //"idx offset to a new line char in string. Starts with the offset of the string "#begin run\n".
    int iEndRunIdx;
    int iLastNewLineIdx;
    int iNewLineIdx;

    while((iBeginRunIdx = strContent.find("#begin run\n" , iBeginRunIdx + iBeginRunNewLineOffset)) != string::npos)
    {
        iEndRunIdx = strContent.find("#end run\n" , iBeginRunIdx + iBeginRunNewLineOffset);
        assert(iEndRunIdx != string::npos);

        iLastNewLineIdx = iBeginRunIdx + iBeginRunNewLineOffset;
        while((iNewLineIdx = strContent.find("\n" , iLastNewLineIdx + 1)) < iEndRunIdx)
        {
            strLine = strContent.substr(iLastNewLineIdx + 1 , iNewLineIdx);
            if(verifyPrefix(strLine))
                threadsPrintfs.push_back(strLine);
            iLastNewLineIdx = iNewLineIdx;
        }

        //sort last run info
        sort(threadsPrintfs.begin() , threadsPrintfs.end() , comparison);
        threadsPrintfs.push_front("#begin run\n");
        threadsPrintfs.push_back("#end run\n");

        //output it
        for(deque<string>::iterator it = threadsPrintfs.begin() ; it != threadsPrintfs.end() ; ++it)
        {
            assert(outputFile.good());
            outputFile.write(it->c_str() , it->size());
        }
        outputFile.flush();
        threadsPrintfs.clear();
    }

    outputFile.close();
}

The problem is that the resulting file has a lot of trash data. For example an input log file with 6KB generated a output log of 192KB! It appears the output file has a lot of repetitions of the input file. When debugging code the deque showed the right values before and after sort, though. I think there is something wrong with the ofstream write itself.

Edit: The function isn't running in parallel.

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(1

唐婉 2024-12-22 10:36:58

只是为了展示最终的代码。请注意 substr 上的更改,现在它接收的不是索引而是长度。

//------------------------------------------------------------------------------------
// Sort the log file. Lines with same prefix (blockIdx,ThreadIdx) will be grouped in file per run.
//------------------------------------------------------------------------------------
void CudaUnitTest::sortFile()
{
comp comparison;
deque<string> threadsPrintfs;
ifstream inputFile(m_strInputFile);
assert(inputFile.is_open());

//Read whole input file and close it. Saves disk accesses.
string strContent((std::istreambuf_iterator<char>(inputFile)), std::istreambuf_iterator<char>());
inputFile.close();

ofstream outputFile(m_strOutputFile);
assert(outputFile.is_open());

string strLine;
int iBeginRunIdx = -10; //value just to addapt on while loop (to start on [0])
int iBeginRunNewLineOffset = 10; //"idx offset to a new line char in string. Starts with the offset of the string "#begin run\n".
int iEndRunIdx;
int iLastNewLineIdx;
int iNewLineIdx;

while((iBeginRunIdx = strContent.find("#begin run\n" , iBeginRunIdx + iBeginRunNewLineOffset)) != string::npos)
{
    iEndRunIdx = strContent.find("#end run\n" , iBeginRunIdx + iBeginRunNewLineOffset);
    assert(iEndRunIdx != string::npos);

    iLastNewLineIdx = iBeginRunIdx + iBeginRunNewLineOffset;
    while((iNewLineIdx = strContent.find("\n" , iLastNewLineIdx + 1)) < iEndRunIdx)
    {
        strLine = strContent.substr(iLastNewLineIdx + 1 , iNewLineIdx - iLastNewLineIdx);
        if(verifyPrefix(strLine))
            threadsPrintfs.push_back(strLine);
        iLastNewLineIdx = iNewLineIdx;
    }

    //sort last run info
    sort(threadsPrintfs.begin() , threadsPrintfs.end() , comparison);
    threadsPrintfs.push_front("#begin run\n");
    threadsPrintfs.push_back("#end run\n");

    //output it
    for(deque<string>::iterator it = threadsPrintfs.begin() ; it != threadsPrintfs.end() ; ++it)
    {
        assert(outputFile.good());
        outputFile.write(it->c_str() , it->size());
    }
    threadsPrintfs.clear();
}

outputFile.close();
}

Just to show the final code. Note the change on substr, now instead of an index it's receiving the lenght.

//------------------------------------------------------------------------------------
// Sort the log file. Lines with same prefix (blockIdx,ThreadIdx) will be grouped in file per run.
//------------------------------------------------------------------------------------
void CudaUnitTest::sortFile()
{
comp comparison;
deque<string> threadsPrintfs;
ifstream inputFile(m_strInputFile);
assert(inputFile.is_open());

//Read whole input file and close it. Saves disk accesses.
string strContent((std::istreambuf_iterator<char>(inputFile)), std::istreambuf_iterator<char>());
inputFile.close();

ofstream outputFile(m_strOutputFile);
assert(outputFile.is_open());

string strLine;
int iBeginRunIdx = -10; //value just to addapt on while loop (to start on [0])
int iBeginRunNewLineOffset = 10; //"idx offset to a new line char in string. Starts with the offset of the string "#begin run\n".
int iEndRunIdx;
int iLastNewLineIdx;
int iNewLineIdx;

while((iBeginRunIdx = strContent.find("#begin run\n" , iBeginRunIdx + iBeginRunNewLineOffset)) != string::npos)
{
    iEndRunIdx = strContent.find("#end run\n" , iBeginRunIdx + iBeginRunNewLineOffset);
    assert(iEndRunIdx != string::npos);

    iLastNewLineIdx = iBeginRunIdx + iBeginRunNewLineOffset;
    while((iNewLineIdx = strContent.find("\n" , iLastNewLineIdx + 1)) < iEndRunIdx)
    {
        strLine = strContent.substr(iLastNewLineIdx + 1 , iNewLineIdx - iLastNewLineIdx);
        if(verifyPrefix(strLine))
            threadsPrintfs.push_back(strLine);
        iLastNewLineIdx = iNewLineIdx;
    }

    //sort last run info
    sort(threadsPrintfs.begin() , threadsPrintfs.end() , comparison);
    threadsPrintfs.push_front("#begin run\n");
    threadsPrintfs.push_back("#end run\n");

    //output it
    for(deque<string>::iterator it = threadsPrintfs.begin() ; it != threadsPrintfs.end() ; ++it)
    {
        assert(outputFile.good());
        outputFile.write(it->c_str() , it->size());
    }
    threadsPrintfs.clear();
}

outputFile.close();
}
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文