Python:如何从 gzip 压缩中传输/传输数据?
我需要做这样的事情,但在 python 中:
dd if=/dev/sdb | gzip -c | curl ftp upload
我不能使用 Popen 的整个命令,因为:
- 我需要非阻塞操作
- 我需要进度信息(尝试循环 proc.stderr 无济于事)
另一件大事是我无法在上传之前在内存或磁盘上创建压缩的 gzip 文件。
所以这就是我想要弄清楚如何做,而 gzip_stream_of_strings(input) 是未知的:
import os, pycurl
filename = '/path/to/super/large/file.img'
filesize = os.path.getsize(filename)
def progress(dl_left, dl_completed, ul_left, ul_completed):
return (ul_completed/filesize)*100
def main():
c = pycurl.Curl()
c.setopt(c.URL, 'ftp://IP/save_as.img.gz')
c.setopt(pycurl.NOPROGRESS, 0)
c.setopt(pycurl.PROGRESSFUNCTION, progress)
c.setopt(pycurl.UPLOAD, 1)
c.setopt(pycurl.INFILESIZE, filesize)
c.setopt(pycurl.USERPWD, 'user:passwd')
with open(filename) as input:
c.setopt(pycurl.READFUNCTION, gzip_stream_of_stings(input))
c.perform()
c.close()
非常感谢任何帮助!
编辑: 这是解决方案:
from gzip import GzipFile
from StringIO import StringIO
CHUNCK_SIZE = 1024
class GZipPipe(StringIO):
"""This class implements a compression pipe suitable for asynchronous
process.
Credit to cdvddt @ http://snippets.dzone.com/posts/show/5644
@param source: this is the input file to compress
@param name: this is stored as the name in the gzip header
@function read: call this to read(size) chunks from the gzip stream
"""
def __init__(self, source = None, name = "data"):
StringIO.__init__(self)
self.source = source
self.source_eof = False
self.buffer = ""
self.zipfile = GzipFile(name, 'wb', 9, self)
def write(self, data):
self.buffer += data
def read(self, size = -1):
while ((len(self.buffer) < size) or (size == -1)) and not self.source_eof:
if self.source == None:
break
chunk = self.source.read(CHUNCK_SIZE)
self.zipfile.write(chunk)
if (len(chunk) < CHUNCK_SIZE) :
self.source_eof = True
self.zipfile.flush()
self.zipfile.close()
break
if size == 0:
result = ""
if size >= 1:
result = self.buffer[0:size]
self.buffer = self.buffer[size:]
else:
result = self.buffer
self.buffer = ""
return result
像这样使用:
with open(filename) as input:
c.setopt(pycurl.READFUNCTION, GZipPipe(input).read)
I need to do something like this, but in python:
dd if=/dev/sdb | gzip -c | curl ftp upload
I can't use the entire command with Popen because:
- I need non-blocking operation
- I need progress information (tried looping through proc.stderr to no avail)
The other big thing is I can't create a compressed gzip file in memory or on disk prior to uploading.
So this is about what I'm looking to figure out how to do, with gzip_stream_of_strings(input) being the unknown:
import os, pycurl
filename = '/path/to/super/large/file.img'
filesize = os.path.getsize(filename)
def progress(dl_left, dl_completed, ul_left, ul_completed):
return (ul_completed/filesize)*100
def main():
c = pycurl.Curl()
c.setopt(c.URL, 'ftp://IP/save_as.img.gz')
c.setopt(pycurl.NOPROGRESS, 0)
c.setopt(pycurl.PROGRESSFUNCTION, progress)
c.setopt(pycurl.UPLOAD, 1)
c.setopt(pycurl.INFILESIZE, filesize)
c.setopt(pycurl.USERPWD, 'user:passwd')
with open(filename) as input:
c.setopt(pycurl.READFUNCTION, gzip_stream_of_stings(input))
c.perform()
c.close()
Any help is greatly appreciated!
EDIT:
Here's the solution:
from gzip import GzipFile
from StringIO import StringIO
CHUNCK_SIZE = 1024
class GZipPipe(StringIO):
"""This class implements a compression pipe suitable for asynchronous
process.
Credit to cdvddt @ http://snippets.dzone.com/posts/show/5644
@param source: this is the input file to compress
@param name: this is stored as the name in the gzip header
@function read: call this to read(size) chunks from the gzip stream
"""
def __init__(self, source = None, name = "data"):
StringIO.__init__(self)
self.source = source
self.source_eof = False
self.buffer = ""
self.zipfile = GzipFile(name, 'wb', 9, self)
def write(self, data):
self.buffer += data
def read(self, size = -1):
while ((len(self.buffer) < size) or (size == -1)) and not self.source_eof:
if self.source == None:
break
chunk = self.source.read(CHUNCK_SIZE)
self.zipfile.write(chunk)
if (len(chunk) < CHUNCK_SIZE) :
self.source_eof = True
self.zipfile.flush()
self.zipfile.close()
break
if size == 0:
result = ""
if size >= 1:
result = self.buffer[0:size]
self.buffer = self.buffer[size:]
else:
result = self.buffer
self.buffer = ""
return result
Used like so:
with open(filename) as input:
c.setopt(pycurl.READFUNCTION, GZipPipe(input).read)
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
内置的 zlib 库允许使用任何文件类型对象,其中包括文本流。
我没有测试过这个。请参阅此问题了解更多信息。
The built in zlib library allows for using any filetype object, which includes text streams.
I have not tested this. See this SO question for additional info.