为什么 fsync() 在 Linux 内核 3.1.* 上比内核 3.0 花费更多时间

发布于 2024-12-26 22:29:30 字数 2722 浏览 1 评论 0原文

我有一个测试程序。在Linux内核3.1.*上大约需要37秒,但在内核3.0.18上只需要大约1秒(我只是在与以前相同的机器上替换内核)。请给我一些关于如何在内核 3.1 上改进它的线索。谢谢!

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>


int my_fsync(int fd)
{
    // return fdatasync(fd);
    return fsync(fd);
}


int main(int argc, char **argv)
{
    int rc = 0;
    int count;
    int i;
    char oldpath[1024];
    char newpath[1024];
    char *writebuffer = calloc(1024, 1);

    snprintf(oldpath, sizeof(oldpath), "./%s", "foo");
    snprintf(newpath, sizeof(newpath), "./%s", "foo.new");

    for (count = 0; count < 1000; ++count) {
    int fd = open(newpath, O_CREAT | O_TRUNC | O_WRONLY, S_IRWXU);
    if (fd == -1) {
        fprintf(stderr, "open error! path: %s\n", newpath);
        exit(1);
    }

    for (i = 0; i < 10; i++) {
        rc = write(fd, writebuffer, 1024);
        if (rc != 1024) {
        fprintf(stderr, "underwrite!\n");
        exit(1);
        }
    }

    if (my_fsync(fd)) {
        perror("fsync failed!\n");
        exit(1);
    }

    if (close(fd)) {
        perror("close failed!\n");
        exit(1);
    }

    if (rename(newpath, oldpath)) {
        perror("rename failed!\n");
        exit(1);
    }

    }

    return 0;
}


# strace -c ./testfsync
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 98.58    0.068004          68      1000           fsync
  0.84    0.000577           0     10001           write
  0.40    0.000275           0      1000           rename
  0.19    0.000129           0      1003           open
  0.00    0.000000           0         1           read
  0.00    0.000000           0      1003           close
  0.00    0.000000           0         1           execve
  0.00    0.000000           0         1         1 access
  0.00    0.000000           0         3           brk
  0.00    0.000000           0         1           munmap
  0.00    0.000000           0         2           setitimer
  0.00    0.000000           0        68           sigreturn
  0.00    0.000000           0         1           uname
  0.00    0.000000           0         1           mprotect
  0.00    0.000000           0         2           writev
  0.00    0.000000           0         2           rt_sigaction
  0.00    0.000000           0         6           mmap2
  0.00    0.000000           0         2           fstat64
  0.00    0.000000           0         1           set_thread_area
------ ----------- ----------- --------- --------- ----------------
100.00    0.068985                 14099         1 total

I have a test program. It takes about 37 seconds on Linux kernel 3.1.*, but only takes about 1 seconds on kernel 3.0.18 (I just replace the kernel on the same machine as before). Please give me a clue on how to improve it on kernel 3.1. Thanks!

#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>


int my_fsync(int fd)
{
    // return fdatasync(fd);
    return fsync(fd);
}


int main(int argc, char **argv)
{
    int rc = 0;
    int count;
    int i;
    char oldpath[1024];
    char newpath[1024];
    char *writebuffer = calloc(1024, 1);

    snprintf(oldpath, sizeof(oldpath), "./%s", "foo");
    snprintf(newpath, sizeof(newpath), "./%s", "foo.new");

    for (count = 0; count < 1000; ++count) {
    int fd = open(newpath, O_CREAT | O_TRUNC | O_WRONLY, S_IRWXU);
    if (fd == -1) {
        fprintf(stderr, "open error! path: %s\n", newpath);
        exit(1);
    }

    for (i = 0; i < 10; i++) {
        rc = write(fd, writebuffer, 1024);
        if (rc != 1024) {
        fprintf(stderr, "underwrite!\n");
        exit(1);
        }
    }

    if (my_fsync(fd)) {
        perror("fsync failed!\n");
        exit(1);
    }

    if (close(fd)) {
        perror("close failed!\n");
        exit(1);
    }

    if (rename(newpath, oldpath)) {
        perror("rename failed!\n");
        exit(1);
    }

    }

    return 0;
}


# strace -c ./testfsync
% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 98.58    0.068004          68      1000           fsync
  0.84    0.000577           0     10001           write
  0.40    0.000275           0      1000           rename
  0.19    0.000129           0      1003           open
  0.00    0.000000           0         1           read
  0.00    0.000000           0      1003           close
  0.00    0.000000           0         1           execve
  0.00    0.000000           0         1         1 access
  0.00    0.000000           0         3           brk
  0.00    0.000000           0         1           munmap
  0.00    0.000000           0         2           setitimer
  0.00    0.000000           0        68           sigreturn
  0.00    0.000000           0         1           uname
  0.00    0.000000           0         1           mprotect
  0.00    0.000000           0         2           writev
  0.00    0.000000           0         2           rt_sigaction
  0.00    0.000000           0         6           mmap2
  0.00    0.000000           0         2           fstat64
  0.00    0.000000           0         1           set_thread_area
------ ----------- ----------- --------- --------- ----------------
100.00    0.068985                 14099         1 total

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

香草可樂 2025-01-02 22:29:30

内核 3.1.* 实际上正在执行同步,3.0.18 正在伪造同步。您的代码执行 1,000 次同步写入。由于截断了文件,因此每次写入也会放大文件。所以你实际上有 2,000 个写操作。典型的硬盘驱动器写入延迟约为每个 I/O 20 毫秒。因此 2,000*20 = 40,000 毫秒或 40 秒。因此,假设您正在写入典型的硬盘驱动器,这似乎是正确的。

基本上,通过在每次写入后进行同步,内核无法有效地缓存或重叠写入,并在每个操作上强制执行最坏情况的行为。此外,硬盘驱动器最终必须在每次写入时在数据写入位置和元数据写入位置之间来回查找。

Kernel 3.1.* is actually doing the sync, 3.0.18 is faking it. Your code does 1,000 synchronized writes. Since you truncate the file, each write also enlarges the file. So you actually have 2,000 write operations. Typical hard drive write latency is about 20 milliseconds per I/O. So 2,000*20 = 40,000 milliseconds or 40 seconds. So it seems about right, assuming you're writing to a typical hard drive.

Basically, by syncing after each write, you give the kernel no ability to efficiently cache or overlap the writes and force worst-case behavior on every operation. Also, the hard drive winds up having to seek back and forth between where the data is written and where the metadata is written once for each write.

淡紫姑娘! 2025-01-02 22:29:30

找到原因了。 Linux 内核 3.1 (http://kernelnewbies.org/Linux_3.1) 的 ext3 中默认启用文件系统屏障。禁用障碍后,它变得更快。

Found the reason. File system barriers enabled by default in ext3 for Linux kernel 3.1 (http://kernelnewbies.org/Linux_3.1). After disable barriers, it becomes much faster.

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文