Python file.read() 在幕后获取超出必要的数据

发布于 2024-09-08 18:34:02 字数 2755 浏览 1 评论 0原文

cat file_ro.py 
import sys
def file_open(filename):
        fo=open(filename,'r')
        fo.seek(7)
        read_data=fo.read(3)
        fo.close()
        print read_data
file_open("file.py")

但是 strace 说

readlink("file_ro.py", 0x7fff31fc7ea0, 4096) = -1 EINVAL (Invalid argument)
getcwd("/home/laks/python", 4096)       = 18
lstat("/home/laks/python/file_ro.py", {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
stat("file_ro.py", {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
open("file_ro.py", O_RDONLY)            = 3
fstat(3, {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa671a6c000
fstat(3, {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
lseek(3, 0, SEEK_SET)                   = 0
read(3, "import sys\ndef file_open(filenam"..., 128) = 128
read(3, "ile_open(\"file.py\")\n\t\n", 4096) = 22
close(3)                                = 0
munmap(0x7fa671a6c000, 4096)            = 0
stat("file_ro.py", {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
open("file_ro.py", O_RDONLY)            = 3
fstat(3, {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
ioctl(3, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fff31fc9e30) = -1 ENOTTY (Inappropriate ioctl for device)
fstat(3, {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa671a6c000
lseek(3, 0, SEEK_CUR)                   = 0
read(3, "import sys\ndef file_open(filenam"..., 4096) = 150
lseek(3, 150, SEEK_SET)                 = 150
read(3, "", 4096)                       = 0
close(3)                                = 0
munmap(0x7fa671a6c000, 4096)            = 0
open("file.py", O_RDONLY)               = 3
fstat(3, {st_mode=S_IFREG|0755, st_size=305, ...}) = 0
fstat(3, {st_mode=S_IFREG|0755, st_size=305, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa671a6c000
lseek(3, 0, SEEK_SET)                   = 0
read(3, "import ", 7)                   = 7
read(3, "sys\ndef file_open(filename):\n\t\"\""..., 4096) = 298
close(3)                                = 0
munmap(0x7fa671a6c000, 4096)            = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa671a6c000
write(1, "sys\n", 4sys
)                    = 4
rt_sigaction(SIGINT, {SIG_DFL, [], SA_RESTORER, 0x306140efa0}, {0x306d10b2b0, [], SA_RESTORER, 0x306140efa0}, 8) = 0
close(5)                                = 0
munmap(0x7fa671952000, 4096)            = 0
exit_group(0)  

正如您在上面看到的 -

read(3, "import sys\ndef file_open(filenam"..., 4096) = 150

为什么当程序说只读取 3 个字节时 read() 返回 150 个字节?

cat file_ro.py 
import sys
def file_open(filename):
        fo=open(filename,'r')
        fo.seek(7)
        read_data=fo.read(3)
        fo.close()
        print read_data
file_open("file.py")

But strace says

readlink("file_ro.py", 0x7fff31fc7ea0, 4096) = -1 EINVAL (Invalid argument)
getcwd("/home/laks/python", 4096)       = 18
lstat("/home/laks/python/file_ro.py", {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
stat("file_ro.py", {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
open("file_ro.py", O_RDONLY)            = 3
fstat(3, {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa671a6c000
fstat(3, {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
lseek(3, 0, SEEK_SET)                   = 0
read(3, "import sys\ndef file_open(filenam"..., 128) = 128
read(3, "ile_open(\"file.py\")\n\t\n", 4096) = 22
close(3)                                = 0
munmap(0x7fa671a6c000, 4096)            = 0
stat("file_ro.py", {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
open("file_ro.py", O_RDONLY)            = 3
fstat(3, {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
ioctl(3, SNDCTL_TMR_TIMEBASE or TCGETS, 0x7fff31fc9e30) = -1 ENOTTY (Inappropriate ioctl for device)
fstat(3, {st_mode=S_IFREG|0755, st_size=150, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa671a6c000
lseek(3, 0, SEEK_CUR)                   = 0
read(3, "import sys\ndef file_open(filenam"..., 4096) = 150
lseek(3, 150, SEEK_SET)                 = 150
read(3, "", 4096)                       = 0
close(3)                                = 0
munmap(0x7fa671a6c000, 4096)            = 0
open("file.py", O_RDONLY)               = 3
fstat(3, {st_mode=S_IFREG|0755, st_size=305, ...}) = 0
fstat(3, {st_mode=S_IFREG|0755, st_size=305, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa671a6c000
lseek(3, 0, SEEK_SET)                   = 0
read(3, "import ", 7)                   = 7
read(3, "sys\ndef file_open(filename):\n\t\"\""..., 4096) = 298
close(3)                                = 0
munmap(0x7fa671a6c000, 4096)            = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fa671a6c000
write(1, "sys\n", 4sys
)                    = 4
rt_sigaction(SIGINT, {SIG_DFL, [], SA_RESTORER, 0x306140efa0}, {0x306d10b2b0, [], SA_RESTORER, 0x306140efa0}, 8) = 0
close(5)                                = 0
munmap(0x7fa671952000, 4096)            = 0
exit_group(0)  

As you can see above -

read(3, "import sys\ndef file_open(filenam"..., 4096) = 150

Why read() returns 150 bytes when the program says to read only 3 bytes ?

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(2

〃温暖了心ぐ 2024-09-15 18:34:02

由于您正在读取另一个 py 文件,事情变得很混乱,但内置函数似乎忽略了您传递给 read() 的值,并缓冲了其余的值。也许尝试使用 os.read() 代替?

file_ro.py:

import sys
def file_open(filename):
        fo=open(filename,'r')
        fo.seek(7)
        read_data=fo.read(3)
        fo.close()
        print read_data
file_open("zzz")

zzz:

12345678901234567890123456789012345678901234567890

strace:

...
open("zzz", O_RDONLY|O_LARGEFILE)       = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=51, ...}) = 0
fstat64(3, {st_mode=S_IFREG|0644, st_size=51, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb73fb000
_llseek(3, 0, [0], SEEK_SET)            = 0
read(3, "1234567", 7)                   = 7
read(3, "89012345678901234567890123456789"..., 4096) = 44
close(3)                                = 0
...

您可以指定要打开的缓冲区的大小('zzz', buffering=0),或者我使用 os 模块,可以根据需要更紧密地控制文件读取:

file_ro2.py:

import sys, os
def file_open(filename):
        fo=os.open(filename, os.O_RDONLY)
        os.lseek(fo, 7, 0)
        read_data=os.read(fo, 3)
        os.close(fo)
        print read_data
file_open("zzz")

strace2:

...
open("zzz", O_RDONLY|O_LARGEFILE)       = 3
_llseek(3, 7, [7], SEEK_SET)            = 0
read(3, "890", 3)                       = 3
close(3)                                = 0
...

Since you're reading in another py file things become confused, but it seems the built-in function ignores the value you pass to read(), and buffers the rest of the value. Maybe trying using os.read() instead?

file_ro.py:

import sys
def file_open(filename):
        fo=open(filename,'r')
        fo.seek(7)
        read_data=fo.read(3)
        fo.close()
        print read_data
file_open("zzz")

zzz:

12345678901234567890123456789012345678901234567890

strace:

...
open("zzz", O_RDONLY|O_LARGEFILE)       = 3
fstat64(3, {st_mode=S_IFREG|0644, st_size=51, ...}) = 0
fstat64(3, {st_mode=S_IFREG|0644, st_size=51, ...}) = 0
mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0xb73fb000
_llseek(3, 0, [0], SEEK_SET)            = 0
read(3, "1234567", 7)                   = 7
read(3, "89012345678901234567890123456789"..., 4096) = 44
close(3)                                = 0
...

You can specify the size of the buffer to open('zzz', buffering=0), or I used the os module and could more closely control the file reading as you wanted:

file_ro2.py:

import sys, os
def file_open(filename):
        fo=os.open(filename, os.O_RDONLY)
        os.lseek(fo, 7, 0)
        read_data=os.read(fo, 3)
        os.close(fo)
        print read_data
file_open("zzz")

strace2:

...
open("zzz", O_RDONLY|O_LARGEFILE)       = 3
_llseek(3, 7, [7], SEEK_SET)            = 0
read(3, "890", 3)                       = 3
close(3)                                = 0
...
心房的律动 2024-09-15 18:34:02

缓冲。为了避免这种情况,请使用 open(filename, 'rb', bufsize=0)

Buffering. To avoid that use open(filename, 'rb', bufsize=0).

~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文