Python ThreadPoolExecutor内存泄漏问题
I'm trying to debug a memory leak in my application, and I think I managed to reduce it to this minimal example:
from typing import Deque
import gc
import os
import psutil
from concurrent.futures import ThreadPoolExecutor
process = psutil.Process(os.getpid()) # we'll use this to track memory usage
executor = ThreadPoolExecutor() # generic executor
n = 100 # number of tasks to run concurrently at any time
prev = 0 # previous maximum memory usage
m = 10_000
def do(): # an arbitrary function that returns a large value
return list(range(m))
# initialize a deque that can track all our running futures
futures = Deque(executor.submit(do) for _ in range(n))
i = 0 # future counter
while futures:
f = futures.popleft() # pop one of the futures (removing its reference from the deque)
result = f.result()
del result, f # delete the future (the future is definitely removed now)
# check the memory usage and print a message if it is a new record
new_mem = process.memory_info().rss
if new_mem > prev:
print(f"{i}: {new_mem:,} bytes, {len(futures)} tasks pending")
prev = new_mem
# enqueue a new task
futures.append(executor.submit(do))
gc.collect()
i+=1
Essentially what we do here is keep a rotating queue of tasks that return a large value, ensuring that there are no more比n
随时同时运行的任务。
我们希望,在大约max_workers
任务完成后,由于未保留了期货中的所有数据,因此最高内存的使用情况将达到平稳状态。
但是,在Python 3.7.12上,我们看到内存使用量不断增加,甚至在千分之二的任务上,高原也会增加!
即使我们更改do()
以返回numpy数组,当我们将max_workers
限制为较小的数字时,并且当我们减少n <时,也将保留此结果。 /代码>。
编辑:我添加了一张表,该表测量了通过迭代计数运行此代码的过程的梨存储器使用情况(m
的不同值)
| peak memory usage after N iterations | m=1,000 | m=5,000 | m=10,000 |
|--------------------------------------|------------|------------|------------|
| 500 | 16,961,536 | 37,662,720 | 61,689,856 |
| 1,000 | 17,408,000 | 38,981,632 | 62,132,224 |
| 1,500 | 17,760,256 | 39,374,848 | 62,164,992 |
| 2,000 | 18,063,360 | 40,177,664 | 62,832,640 |
| 2,500 | 18,219,008 | 40,845,312 | 62,832,640 |
| 3,000 | 18,374,656 | 40,931,328 | 63,156,224 |
| 3,500 | 18,493,440 | 41,492,480 | 63,328,256 |
| 4,000 | 18,604,032 | 41,586,688 | 63,328,256 |
| 4,500 | 18,673,664 | 41,586,688 | 63,328,256 |
| 5,000 | 18,763,776 | 41,586,688 | 63,328,256 |
| 5,500 | 18,800,640 | 41,586,688 | 63,328,256 |
| 6,000 | 18,853,888 | 41,615,360 | 63,328,256 |
| 6,500 | 18,898,944 | 41,660,416 | 63,328,256 |
| 7,000 | 18,931,712 | 41,795,584 | 63,328,256 |
| 7,500 | 18,952,192 | 41,930,752 | 63,328,256 |
| 8,000 | 18,952,192 | 42,061,824 | 63,328,256 |
| 8,500 | 18,993,152 | 42,377,216 | 63,361,024 |
| 9,000 | 19,046,400 | 42,463,232 | 63,528,960 |
| 9,500 | 19,075,072 | 42,463,232 | 63,528,960 |
| 10,000 | 19,075,072 | 42,463,232 | 63,528,960 |
| 10,500 | 19,083,264 | 42,463,232 | 63,533,056 |
| 11,000 | 19,083,264 | 42,463,232 | 63,664,128 |
| 11,500 | 19,128,320 | 42,553,344 | 63,664,128 |
| 12,000 | 19,185,664 | 42,553,344 | 63,664,128 |
| 12,500 | 19,185,664 | 42,553,344 | 63,664,128 |
| 13,000 | 19,185,664 | 42,553,344 | 63,664,128 |
| 13,500 | 19,193,856 | 42,553,344 | 64,167,936 |
| 14,000 | 19,193,856 | 42,594,304 | 64,167,936 |
| 14,500 | 19,193,856 | 42,594,304 | 64,167,936 |
| 15,000 | 19,202,048 | 42,594,304 | 64,249,856 |
| 15,500 | 19,202,048 | 42,594,304 | 64,249,856 |
| 16,000 | 19,222,528 | 42,594,304 | 64,462,848 |
| 16,500 | 19,353,600 | 42,594,304 | 64,499,712 |
| 17,000 | 19,361,792 | 42,635,264 | 64,499,712 |
| 17,500 | 19,415,040 | 42,725,376 | 64,499,712 |
| 18,000 | 19,423,232 | 42,766,336 | 64,499,712 |
| 18,500 | 19,431,424 | 42,811,392 | 64,499,712 |
| 19,000 | 19,476,480 | 42,811,392 | 64,499,712 |
| 19,500 | 19,537,920 | 42,811,392 | 64,520,192 |
| 20,000 | 19,607,552 | 42,856,448 | 65,044,480 |
| 20,500 | 19,607,552 | 42,946,560 | 65,044,480 |
| 21,000 | 19,636,224 | 43,110,400 | 65,044,480 |
| 21,500 | 19,644,416 | 43,110,400 | 65,044,480 |
| 22,000 | 19,652,608 | 43,110,400 | 65,044,480 |
| 22,500 | 19,722,240 | 43,110,400 | 65,044,480 |
| 23,000 | 19,746,816 | 43,110,400 | 65,044,480 |
| 23,500 | 19,746,816 | 43,110,400 | 65,044,480 |
| 24,000 | 19,746,816 | 43,110,400 | 65,044,480 |
| 24,500 | 19,775,488 | 43,110,400 | 65,044,480 |
| 25,000 | 19,800,064 | 43,110,400 | 65,044,480 |
| 25,500 | 19,820,544 | 43,110,400 | 65,044,480 |
| 26,000 | 19,886,080 | 43,110,400 | 65,044,480 |
| 26,500 | 19,886,080 | 43,118,592 | 65,044,480 |
| 27,000 | 19,894,272 | 43,118,592 | 65,044,480 |
| 27,500 | 19,939,328 | 43,208,704 | 65,044,480 |
| 28,000 | 19,947,520 | 43,208,704 | 65,044,480 |
| 28,500 | 20,058,112 | 43,208,704 | 65,044,480 |
| 29,000 | 20,086,784 | 43,208,704 | 65,044,480 |
| 29,500 | 20,086,784 | 43,208,704 | 65,044,480 |
| 30,000 | 20,086,784 | 43,208,704 | 65,044,480 |
I'm trying to debug a memory leak in my application, and I think I managed to reduce it to this minimal example:
from typing import Deque
import gc
import os
import psutil
from concurrent.futures import ThreadPoolExecutor
process = psutil.Process(os.getpid()) # we'll use this to track memory usage
executor = ThreadPoolExecutor() # generic executor
n = 100 # number of tasks to run concurrently at any time
prev = 0 # previous maximum memory usage
m = 10_000
def do(): # an arbitrary function that returns a large value
return list(range(m))
# initialize a deque that can track all our running futures
futures = Deque(executor.submit(do) for _ in range(n))
i = 0 # future counter
while futures:
f = futures.popleft() # pop one of the futures (removing its reference from the deque)
result = f.result()
del result, f # delete the future (the future is definitely removed now)
# check the memory usage and print a message if it is a new record
new_mem = process.memory_info().rss
if new_mem > prev:
print(f"{i}: {new_mem:,} bytes, {len(futures)} tasks pending")
prev = new_mem
# enqueue a new task
futures.append(executor.submit(do))
gc.collect()
i+=1
Essentially what we do here is keep a rotating queue of tasks that return a large value, ensuring that there are no more than n
tasks running concurrently at any time.
We would expect, after about max_workers
tasks have completed, that top memory usage would reach a plateau since none of the data from the futures is being preserved.
However, on python 3.7.12, we see that memory usage increases continually and plateaus sometimes even on the thousandth task!
This outcome is preserved even when we change do()
to return a numpy array, when we limit the max_workers
to a smaller number, and when we reduce n
.
EDIT: I added a table measuring the pear memory usage (on my machine) of a process running this code by iteration count, for different values of m
| peak memory usage after N iterations | m=1,000 | m=5,000 | m=10,000 |
|--------------------------------------|------------|------------|------------|
| 500 | 16,961,536 | 37,662,720 | 61,689,856 |
| 1,000 | 17,408,000 | 38,981,632 | 62,132,224 |
| 1,500 | 17,760,256 | 39,374,848 | 62,164,992 |
| 2,000 | 18,063,360 | 40,177,664 | 62,832,640 |
| 2,500 | 18,219,008 | 40,845,312 | 62,832,640 |
| 3,000 | 18,374,656 | 40,931,328 | 63,156,224 |
| 3,500 | 18,493,440 | 41,492,480 | 63,328,256 |
| 4,000 | 18,604,032 | 41,586,688 | 63,328,256 |
| 4,500 | 18,673,664 | 41,586,688 | 63,328,256 |
| 5,000 | 18,763,776 | 41,586,688 | 63,328,256 |
| 5,500 | 18,800,640 | 41,586,688 | 63,328,256 |
| 6,000 | 18,853,888 | 41,615,360 | 63,328,256 |
| 6,500 | 18,898,944 | 41,660,416 | 63,328,256 |
| 7,000 | 18,931,712 | 41,795,584 | 63,328,256 |
| 7,500 | 18,952,192 | 41,930,752 | 63,328,256 |
| 8,000 | 18,952,192 | 42,061,824 | 63,328,256 |
| 8,500 | 18,993,152 | 42,377,216 | 63,361,024 |
| 9,000 | 19,046,400 | 42,463,232 | 63,528,960 |
| 9,500 | 19,075,072 | 42,463,232 | 63,528,960 |
| 10,000 | 19,075,072 | 42,463,232 | 63,528,960 |
| 10,500 | 19,083,264 | 42,463,232 | 63,533,056 |
| 11,000 | 19,083,264 | 42,463,232 | 63,664,128 |
| 11,500 | 19,128,320 | 42,553,344 | 63,664,128 |
| 12,000 | 19,185,664 | 42,553,344 | 63,664,128 |
| 12,500 | 19,185,664 | 42,553,344 | 63,664,128 |
| 13,000 | 19,185,664 | 42,553,344 | 63,664,128 |
| 13,500 | 19,193,856 | 42,553,344 | 64,167,936 |
| 14,000 | 19,193,856 | 42,594,304 | 64,167,936 |
| 14,500 | 19,193,856 | 42,594,304 | 64,167,936 |
| 15,000 | 19,202,048 | 42,594,304 | 64,249,856 |
| 15,500 | 19,202,048 | 42,594,304 | 64,249,856 |
| 16,000 | 19,222,528 | 42,594,304 | 64,462,848 |
| 16,500 | 19,353,600 | 42,594,304 | 64,499,712 |
| 17,000 | 19,361,792 | 42,635,264 | 64,499,712 |
| 17,500 | 19,415,040 | 42,725,376 | 64,499,712 |
| 18,000 | 19,423,232 | 42,766,336 | 64,499,712 |
| 18,500 | 19,431,424 | 42,811,392 | 64,499,712 |
| 19,000 | 19,476,480 | 42,811,392 | 64,499,712 |
| 19,500 | 19,537,920 | 42,811,392 | 64,520,192 |
| 20,000 | 19,607,552 | 42,856,448 | 65,044,480 |
| 20,500 | 19,607,552 | 42,946,560 | 65,044,480 |
| 21,000 | 19,636,224 | 43,110,400 | 65,044,480 |
| 21,500 | 19,644,416 | 43,110,400 | 65,044,480 |
| 22,000 | 19,652,608 | 43,110,400 | 65,044,480 |
| 22,500 | 19,722,240 | 43,110,400 | 65,044,480 |
| 23,000 | 19,746,816 | 43,110,400 | 65,044,480 |
| 23,500 | 19,746,816 | 43,110,400 | 65,044,480 |
| 24,000 | 19,746,816 | 43,110,400 | 65,044,480 |
| 24,500 | 19,775,488 | 43,110,400 | 65,044,480 |
| 25,000 | 19,800,064 | 43,110,400 | 65,044,480 |
| 25,500 | 19,820,544 | 43,110,400 | 65,044,480 |
| 26,000 | 19,886,080 | 43,110,400 | 65,044,480 |
| 26,500 | 19,886,080 | 43,118,592 | 65,044,480 |
| 27,000 | 19,894,272 | 43,118,592 | 65,044,480 |
| 27,500 | 19,939,328 | 43,208,704 | 65,044,480 |
| 28,000 | 19,947,520 | 43,208,704 | 65,044,480 |
| 28,500 | 20,058,112 | 43,208,704 | 65,044,480 |
| 29,000 | 20,086,784 | 43,208,704 | 65,044,480 |
| 29,500 | 20,086,784 | 43,208,704 | 65,044,480 |
| 30,000 | 20,086,784 | 43,208,704 | 65,044,480 |
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论