pandas read文件时出现了MemeryError,在不shutdown当前jupyter文件的情况下如何回收内存?
出现的情况
user_log = pd.read_csv(’一个1.8G的文件‘)
# 已证明8G内存的电脑不行,在jupyter种操作的时候结果如下:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-26-126c6dffbe38> in <module>()
----> 1 user_log = pd.read_csv(path6)
2 user_log.sample(5)
E:\miniconda\envs\course_py35\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
653 skip_blank_lines=skip_blank_lines)
654
--> 655 return _read(filepath_or_buffer, kwds)
656
657 parser_f.__name__ = name
E:\miniconda\envs\course_py35\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
409
410 try:
--> 411 data = parser.read(nrows)
412 finally:
413 parser.close()
E:\miniconda\envs\course_py35\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
1021 new_rows = len(index)
1022
-> 1023 df = DataFrame(col_dict, columns=columns, index=index)
1024
1025 self._currow += new_rows
E:\miniconda\envs\course_py35\lib\site-packages\pandas\core\frame.py in __init__(self, data, index, columns, dtype, copy)
273 dtype=dtype, copy=copy)
274 elif isinstance(data, dict):
--> 275 mgr = self._init_dict(data, index, columns, dtype=dtype)
276 elif isinstance(data, ma.MaskedArray):
277 import numpy.ma.mrecords as mrecords
E:\miniconda\envs\course_py35\lib\site-packages\pandas\core\frame.py in _init_dict(self, data, index, columns, dtype)
409 arrays = [data[k] for k in keys]
410
--> 411 return _arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
412
413 def _init_ndarray(self, values, index, columns, dtype=None, copy=False):
E:\miniconda\envs\course_py35\lib\site-packages\pandas\core\frame.py in _arrays_to_mgr(arrays, arr_names, index, columns, dtype)
5504 axes = [_ensure_index(columns), _ensure_index(index)]
5505
-> 5506 return create_block_manager_from_arrays(arrays, arr_names, axes)
5507
5508
E:\miniconda\envs\course_py35\lib\site-packages\pandas\core\internals.py in create_block_manager_from_arrays(arrays, names, axes)
4307
4308 try:
-> 4309 blocks = form_blocks(arrays, names, axes)
4310 mgr = BlockManager(blocks, axes)
4311 mgr._consolidate_inplace()
E:\miniconda\envs\course_py35\lib\site-packages\pandas\core\internals.py in form_blocks(arrays, names, axes)
4379
4380 if len(int_items):
-> 4381 int_blocks = _multi_blockify(int_items)
4382 blocks.extend(int_blocks)
4383
E:\miniconda\envs\course_py35\lib\site-packages\pandas\core\internals.py in _multi_blockify(tuples, dtype)
4448 for dtype, tup_block in grouper:
4449
-> 4450 values, placement = _stack_arrays(list(tup_block), dtype)
4451
4452 block = make_block(values, placement=placement)
E:\miniconda\envs\course_py35\lib\site-packages\pandas\core\internals.py in _stack_arrays(tuples, dtype)
4491 shape = (len(arrays),) + _shape_compat(first)
4492
-> 4493 stacked = np.empty(shape, dtype=dtype)
4494 for i, arr in enumerate(arrays):
4495 stacked[i] = _asarray_compat(arr)
MemoryError:
目的
查看任务管理器,内存使用量达到了90%以上,如何在不shutdown当前文件的情况下回收读取这个文件时占用的内存?
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
把你不需要的变量设成None,把不需要的cell删掉,
import gc; gc.collect()