Matplotlib内存泄漏在循环中保存数字时
我正在研究一个大数据集,我需要在每个数据点创建和导出图像。我在过程中间的记忆饱和度。我应用了一些内存管理工具和库,发现内存泄漏,显然来自matplotlib。我已经尝试了垃圾收集器,删除了循环中的所有变量,甚至在保存后删除了数字,但是记忆在每次迭代时都会不断增加。在这里,代码,按线内存分析和前10名对内存的影响“ tracemalloc”
displacement=['Double sinus','Linear','Sinus in X','Sinus in Y']
os.chdir('E:\\Quality\\Magnitude')
disp_axis=[1,1,0,1]
import gc
from matplotlib import figure
import tracemalloc
tracemalloc.start(10)
for a in range(len(Folder_u)):
for b in range(len(Folder_u[a])):
u_exp = pd.read_csv(Folder_u[a][b],header=None,delimiter=' ',engine='pyarrow')
v_exp = pd.read_csv(Folder_v[a][b],header=None,delimiter=' ',engine='pyarrow')
TH_u = pd.read_csv(u_th[b],sep=',',header=None,engine='pyarrow')
TH_v = pd.read_csv(v_th[b],sep=',',header=None,engine='pyarrow')
M_exp = np.sqrt(u_exp.dropna(axis=1)**2+v_exp.dropna(axis=1)**2)
M_th = np.sqrt(TH_u**2+TH_v**2)
Error = abs(M_exp-M_th)/M_th *100
MM_E = Error.mean(axis=disp_axis[b])
MM_exp = M_exp.mean(axis=disp_axis[b])
MM_th = M_th.mean(axis=disp_axis[b])
name = N[a]
title=name.replace('.json','')
xth = np.linspace(0,len(MM_th),len(MM_th))
xe = np.linspace(0,len(MM_exp),len(MM_exp))
## mean error
#fig=figure.Figure(figsize=(15,10))
#ax = fig.subplots(1)
fig,ax = plt.subplots(figsize=(15,10))
plt.title('displacement = '+displacement[b]+', Config = ' + title ,fontsize=20,fontweight='bold')
#plt.title(Pyr_Type[l]+displacement[j])
ax.plot(MM_E,'b')
plt.ylim([0,100])
plt.yticks(np.linspace(0,100,11))
plt.ylabel('mean magnitude error [%]',fontsize=14)
plt.xlabel('Pixels',fontsize=14)
plt.grid()
#plt.ylim([0,5])
ax1=ax.twinx()
ax1.plot(xth,MM_th,'k',xe,MM_exp,'r-.')
plt.ylim([0,23])
plt.ylabel('mean displacement magnitude [Pixels]',fontsize=14)
plt.xlabel('Pixels',fontsize=14)
#plt.ylim([4,23])
#fig.set_dpi(300)
plt.grid()
fig.legend(['Mean error','Theoretical','Results'],fontsize=14,loc='upper right')
fig.set_dpi(250)
plt.show(fig)
plt.savefig('Mean_error_'+title+'_'+str(b)+'.png')
print(a)
del u_exp
del v_exp
del TH_u
del TH_v
del M_exp
del M_th
del Error
del MM_E
del MM_exp
del MM_th
del fig
del ax, ax1, name, title, xth,xe
gc.collect()
print('deleted')
snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')
tracemalloc.clear_traces()
在这里逐行分析内存的分析:
Line # Mem usage Increment Occurrences Line Contents
=============================================================
241 2261.1 MiB 2261.1 MiB 1 @profile
242
243 def test():
244
245
246
247 #Pyr_Type = ['Haar (old)','Haar (NPP)','Burt (old)','Burt (new,old conv.)','Burt (new,new conv.)','Burt(NPP)']
248 2261.1 MiB 0.0 MiB 1 displacement=['Double sinus','Linear','Sinus in X','Sinus in Y']
249 2261.1 MiB 0.0 MiB 1 os.chdir('E:\\Quality\\Magnitude')
250 2261.1 MiB 0.0 MiB 1 disp_axis=[1,1,0,1]
251 2261.1 MiB 0.0 MiB 1 import gc
252 # from matplotlib import figure
253
254 2261.1 MiB 0.0 MiB 1 import tracemalloc
255 2261.1 MiB 0.0 MiB 1 tracemalloc.start(10)
256
257
258
259
260
261 2432.0 MiB 0.0 MiB 2 for a in range(1):
262 2432.0 MiB 0.0 MiB 5 for b in range(len(Folder_u[a])):
263 2361.8 MiB -34.9 MiB 4 u_exp = pd.read_csv(Folder_u[a][b],header=None,delimiter=' ',engine='pyarrow')
264 2361.0 MiB 2.0 MiB 4 v_exp = pd.read_csv(Folder_v[a][b],header=None,delimiter=' ',engine='pyarrow')
265 2399.2 MiB 110.1 MiB 4 TH_u = pd.read_csv(u_th[b],sep=',',header=None,engine='pyarrow')
266 2402.3 MiB 6.8 MiB 4 TH_v = pd.read_csv(v_th[b],sep=',',header=None,engine='pyarrow')
267 2433.4 MiB 126.2 MiB 4 M_exp = np.sqrt(u_exp.dropna(axis=1)**2+v_exp.dropna(axis=1)**2)
268 2464.9 MiB 124.5 MiB 4 M_th = np.sqrt(TH_u**2+TH_v**2)
269 2495.9 MiB 125.0 MiB 4 Error = abs(M_exp-M_th)/M_th *100
270 2495.9 MiB 0.0 MiB 4 MM_E = Error.mean(axis=disp_axis[b])
271 2495.9 MiB 0.0 MiB 4 MM_exp = M_exp.mean(axis=disp_axis[b])
272 2495.9 MiB 0.0 MiB 4 MM_th = M_th.mean(axis=disp_axis[b])
273
274 2495.9 MiB 0.0 MiB 4 name = N[a]
275 2495.9 MiB 0.0 MiB 4 title=name.replace('.json','')
276
277 2495.9 MiB 0.0 MiB 4 xth = np.linspace(0,len(MM_th),len(MM_th))
278 2495.9 MiB 0.0 MiB 4 xe = np.linspace(0,len(MM_exp),len(MM_exp))
279 ## mean error
280
281 #fig=figure.Figure(figsize=(15,10))
282 #ax = fig.subplots(1)
283 2497.9 MiB -27.5 MiB 4 fig,ax = plt.subplots(figsize=(15,10))
284
285 2497.9 MiB 0.0 MiB 4 plt.title('displacement = '+displacement[b]+', Config = ' + title ,fontsize=20,fontweight='bold')
286 #plt.title(Pyr_Type[l]+displacement[j])
287 2497.9 MiB 0.0 MiB 4 ax.plot(MM_E,'b')
288 2497.9 MiB 0.0 MiB 4 plt.ylim([0,100])
289 2499.9 MiB 10.0 MiB 4 plt.yticks(np.linspace(0,100,11))
290 2499.9 MiB 0.0 MiB 4 plt.ylabel('mean magnitude error [%]',fontsize=14)
291 2499.9 MiB 0.0 MiB 4 plt.xlabel('Pixels',fontsize=14)
292 2499.9 MiB 0.0 MiB 4 plt.grid()
293 #plt.ylim([0,5])
294
295 2499.9 MiB 0.0 MiB 4 ax1=ax.twinx()
296 2499.9 MiB 0.1 MiB 4 ax1.plot(xth,MM_th,'k',xe,MM_exp,'r-.')
297 2499.9 MiB 0.3 MiB 4 plt.ylim([0,23])
298 2499.9 MiB 4.0 MiB 4 plt.ylabel('mean displacement magnitude [Pixels]',fontsize=14)
299 2499.9 MiB 0.0 MiB 4 plt.xlabel('Pixels',fontsize=14)
300
301 2499.9 MiB 0.5 MiB 4 plt.grid()
302 2503.9 MiB 10.1 MiB 4 fig.legend(['Mean error','Theoretical','Results'],fontsize=14,loc='upper right')
303
304 2519.8 MiB 61.8 MiB 4 plt.savefig('Mean_error_'+title+'_'+str(b)+'.png',dpi=300)
305 2535.6 MiB 71.6 MiB 4 plt.show()
306 2535.6 MiB 0.0 MiB 4 print(a)
307
308 #fig.clear()
309 #plt.close()
310 #plt.cla()
311 2535.6 MiB 0.0 MiB 4 del u_exp
312 2535.6 MiB 0.0 MiB 4 del v_exp
313 2535.6 MiB 0.0 MiB 4 del TH_u
314 2535.6 MiB 0.0 MiB 4 del TH_v
315 2504.5 MiB -124.0 MiB 4 del M_exp
316 2473.5 MiB -124.0 MiB 4 del M_th
317 2442.5 MiB -124.0 MiB 4 del Error
318 2442.5 MiB 0.0 MiB 4 del MM_E
319 2442.5 MiB 0.0 MiB 4 del MM_exp
320 2442.5 MiB 0.0 MiB 4 del MM_th
321 2442.5 MiB 0.0 MiB 4 del fig
322 2442.5 MiB 0.0 MiB 4 del ax, ax1, name, title, xth,xe
323 2440.0 MiB -13.0 MiB 4 gc.collect()
324 2440.0 MiB 0.0 MiB 4 print('deleted')
325 2440.0 MiB 7.7 MiB 4 snapshot = tracemalloc.take_snapshot()
326 2440.0 MiB -2.8 MiB 4 top_stats = snapshot.statistics('lineno')
327 2432.0 MiB -42.0 MiB 4 tracemalloc.clear_traces()
328
329 #break
330
331 #break
332 #plt.show()
这里是Tracemalloc的前10名:
C:\Users\user\AppData\Roaming\Python\Python39\site-packages\pandas\core\internals\blocks.py:402: size=62.0 MiB, count=5, average=12.4 MiB
C:\Users\user\anaconda3\lib\site-packages\numexpr\necompiler.py:836: size=31.0 MiB, count=2, average=15.5 MiB
C:\Users\user\AppData\Roaming\Python\Python39\site-packages\pandas\core\indexes\base.py:2237: size=194 KiB, count=17, average=11.4 KiB
C:\Users\user\anaconda3\lib\selectors.py:315: size=144 KiB, count=5, average=28.8 KiB
C:\Users\user\anaconda3\lib\site-packages\numpy\core\_asarray.py:102: size=119 KiB, count=387, average=314 B
C:\Users\user\anaconda3\lib\linecache.py:137: size=114 KiB, count=1165, average=100 B
C:\Users\user\anaconda3\lib\site-packages\matplotlib\lines.py:377: size=97.0 KiB, count=45, average=2208 B
C:\Users\user\anaconda3\lib\site-packages\matplotlib\lines.py:672: size=95.9 KiB, count=28, average=3509 B
<unknown>:0: size=95.8 KiB, count=23, average=4264 B
C:\Users\user\anaconda3\lib\site-packages\matplotlib\text.py:265: size=90.6 KiB, count=42, average=2208 B
```````````
I'm working on a big data set and i need to create and export images at each data point. I'm hitting the memory saturation at around the middle of the process. I applied some memory management tools and libraries and found that there's a memory leak and apparently comes from matplotlib. I've already tried the garbage collector, deleting all the variables within the loop, and even deleting the figure after saved but memory keeps increasing at each iteration. Here the code, the line by line memory analysis and the top 10 impact on the memory by "tracemalloc"
displacement=['Double sinus','Linear','Sinus in X','Sinus in Y']
os.chdir('E:\\Quality\\Magnitude')
disp_axis=[1,1,0,1]
import gc
from matplotlib import figure
import tracemalloc
tracemalloc.start(10)
for a in range(len(Folder_u)):
for b in range(len(Folder_u[a])):
u_exp = pd.read_csv(Folder_u[a][b],header=None,delimiter=' ',engine='pyarrow')
v_exp = pd.read_csv(Folder_v[a][b],header=None,delimiter=' ',engine='pyarrow')
TH_u = pd.read_csv(u_th[b],sep=',',header=None,engine='pyarrow')
TH_v = pd.read_csv(v_th[b],sep=',',header=None,engine='pyarrow')
M_exp = np.sqrt(u_exp.dropna(axis=1)**2+v_exp.dropna(axis=1)**2)
M_th = np.sqrt(TH_u**2+TH_v**2)
Error = abs(M_exp-M_th)/M_th *100
MM_E = Error.mean(axis=disp_axis[b])
MM_exp = M_exp.mean(axis=disp_axis[b])
MM_th = M_th.mean(axis=disp_axis[b])
name = N[a]
title=name.replace('.json','')
xth = np.linspace(0,len(MM_th),len(MM_th))
xe = np.linspace(0,len(MM_exp),len(MM_exp))
## mean error
#fig=figure.Figure(figsize=(15,10))
#ax = fig.subplots(1)
fig,ax = plt.subplots(figsize=(15,10))
plt.title('displacement = '+displacement[b]+', Config = ' + title ,fontsize=20,fontweight='bold')
#plt.title(Pyr_Type[l]+displacement[j])
ax.plot(MM_E,'b')
plt.ylim([0,100])
plt.yticks(np.linspace(0,100,11))
plt.ylabel('mean magnitude error [%]',fontsize=14)
plt.xlabel('Pixels',fontsize=14)
plt.grid()
#plt.ylim([0,5])
ax1=ax.twinx()
ax1.plot(xth,MM_th,'k',xe,MM_exp,'r-.')
plt.ylim([0,23])
plt.ylabel('mean displacement magnitude [Pixels]',fontsize=14)
plt.xlabel('Pixels',fontsize=14)
#plt.ylim([4,23])
#fig.set_dpi(300)
plt.grid()
fig.legend(['Mean error','Theoretical','Results'],fontsize=14,loc='upper right')
fig.set_dpi(250)
plt.show(fig)
plt.savefig('Mean_error_'+title+'_'+str(b)+'.png')
print(a)
del u_exp
del v_exp
del TH_u
del TH_v
del M_exp
del M_th
del Error
del MM_E
del MM_exp
del MM_th
del fig
del ax, ax1, name, title, xth,xe
gc.collect()
print('deleted')
snapshot = tracemalloc.take_snapshot()
top_stats = snapshot.statistics('lineno')
tracemalloc.clear_traces()
Here the analysis of the memory broken down line by line:
Line # Mem usage Increment Occurrences Line Contents
=============================================================
241 2261.1 MiB 2261.1 MiB 1 @profile
242
243 def test():
244
245
246
247 #Pyr_Type = ['Haar (old)','Haar (NPP)','Burt (old)','Burt (new,old conv.)','Burt (new,new conv.)','Burt(NPP)']
248 2261.1 MiB 0.0 MiB 1 displacement=['Double sinus','Linear','Sinus in X','Sinus in Y']
249 2261.1 MiB 0.0 MiB 1 os.chdir('E:\\Quality\\Magnitude')
250 2261.1 MiB 0.0 MiB 1 disp_axis=[1,1,0,1]
251 2261.1 MiB 0.0 MiB 1 import gc
252 # from matplotlib import figure
253
254 2261.1 MiB 0.0 MiB 1 import tracemalloc
255 2261.1 MiB 0.0 MiB 1 tracemalloc.start(10)
256
257
258
259
260
261 2432.0 MiB 0.0 MiB 2 for a in range(1):
262 2432.0 MiB 0.0 MiB 5 for b in range(len(Folder_u[a])):
263 2361.8 MiB -34.9 MiB 4 u_exp = pd.read_csv(Folder_u[a][b],header=None,delimiter=' ',engine='pyarrow')
264 2361.0 MiB 2.0 MiB 4 v_exp = pd.read_csv(Folder_v[a][b],header=None,delimiter=' ',engine='pyarrow')
265 2399.2 MiB 110.1 MiB 4 TH_u = pd.read_csv(u_th[b],sep=',',header=None,engine='pyarrow')
266 2402.3 MiB 6.8 MiB 4 TH_v = pd.read_csv(v_th[b],sep=',',header=None,engine='pyarrow')
267 2433.4 MiB 126.2 MiB 4 M_exp = np.sqrt(u_exp.dropna(axis=1)**2+v_exp.dropna(axis=1)**2)
268 2464.9 MiB 124.5 MiB 4 M_th = np.sqrt(TH_u**2+TH_v**2)
269 2495.9 MiB 125.0 MiB 4 Error = abs(M_exp-M_th)/M_th *100
270 2495.9 MiB 0.0 MiB 4 MM_E = Error.mean(axis=disp_axis[b])
271 2495.9 MiB 0.0 MiB 4 MM_exp = M_exp.mean(axis=disp_axis[b])
272 2495.9 MiB 0.0 MiB 4 MM_th = M_th.mean(axis=disp_axis[b])
273
274 2495.9 MiB 0.0 MiB 4 name = N[a]
275 2495.9 MiB 0.0 MiB 4 title=name.replace('.json','')
276
277 2495.9 MiB 0.0 MiB 4 xth = np.linspace(0,len(MM_th),len(MM_th))
278 2495.9 MiB 0.0 MiB 4 xe = np.linspace(0,len(MM_exp),len(MM_exp))
279 ## mean error
280
281 #fig=figure.Figure(figsize=(15,10))
282 #ax = fig.subplots(1)
283 2497.9 MiB -27.5 MiB 4 fig,ax = plt.subplots(figsize=(15,10))
284
285 2497.9 MiB 0.0 MiB 4 plt.title('displacement = '+displacement[b]+', Config = ' + title ,fontsize=20,fontweight='bold')
286 #plt.title(Pyr_Type[l]+displacement[j])
287 2497.9 MiB 0.0 MiB 4 ax.plot(MM_E,'b')
288 2497.9 MiB 0.0 MiB 4 plt.ylim([0,100])
289 2499.9 MiB 10.0 MiB 4 plt.yticks(np.linspace(0,100,11))
290 2499.9 MiB 0.0 MiB 4 plt.ylabel('mean magnitude error [%]',fontsize=14)
291 2499.9 MiB 0.0 MiB 4 plt.xlabel('Pixels',fontsize=14)
292 2499.9 MiB 0.0 MiB 4 plt.grid()
293 #plt.ylim([0,5])
294
295 2499.9 MiB 0.0 MiB 4 ax1=ax.twinx()
296 2499.9 MiB 0.1 MiB 4 ax1.plot(xth,MM_th,'k',xe,MM_exp,'r-.')
297 2499.9 MiB 0.3 MiB 4 plt.ylim([0,23])
298 2499.9 MiB 4.0 MiB 4 plt.ylabel('mean displacement magnitude [Pixels]',fontsize=14)
299 2499.9 MiB 0.0 MiB 4 plt.xlabel('Pixels',fontsize=14)
300
301 2499.9 MiB 0.5 MiB 4 plt.grid()
302 2503.9 MiB 10.1 MiB 4 fig.legend(['Mean error','Theoretical','Results'],fontsize=14,loc='upper right')
303
304 2519.8 MiB 61.8 MiB 4 plt.savefig('Mean_error_'+title+'_'+str(b)+'.png',dpi=300)
305 2535.6 MiB 71.6 MiB 4 plt.show()
306 2535.6 MiB 0.0 MiB 4 print(a)
307
308 #fig.clear()
309 #plt.close()
310 #plt.cla()
311 2535.6 MiB 0.0 MiB 4 del u_exp
312 2535.6 MiB 0.0 MiB 4 del v_exp
313 2535.6 MiB 0.0 MiB 4 del TH_u
314 2535.6 MiB 0.0 MiB 4 del TH_v
315 2504.5 MiB -124.0 MiB 4 del M_exp
316 2473.5 MiB -124.0 MiB 4 del M_th
317 2442.5 MiB -124.0 MiB 4 del Error
318 2442.5 MiB 0.0 MiB 4 del MM_E
319 2442.5 MiB 0.0 MiB 4 del MM_exp
320 2442.5 MiB 0.0 MiB 4 del MM_th
321 2442.5 MiB 0.0 MiB 4 del fig
322 2442.5 MiB 0.0 MiB 4 del ax, ax1, name, title, xth,xe
323 2440.0 MiB -13.0 MiB 4 gc.collect()
324 2440.0 MiB 0.0 MiB 4 print('deleted')
325 2440.0 MiB 7.7 MiB 4 snapshot = tracemalloc.take_snapshot()
326 2440.0 MiB -2.8 MiB 4 top_stats = snapshot.statistics('lineno')
327 2432.0 MiB -42.0 MiB 4 tracemalloc.clear_traces()
328
329 #break
330
331 #break
332 #plt.show()
Here the top 10 from tracemalloc:
C:\Users\user\AppData\Roaming\Python\Python39\site-packages\pandas\core\internals\blocks.py:402: size=62.0 MiB, count=5, average=12.4 MiB
C:\Users\user\anaconda3\lib\site-packages\numexpr\necompiler.py:836: size=31.0 MiB, count=2, average=15.5 MiB
C:\Users\user\AppData\Roaming\Python\Python39\site-packages\pandas\core\indexes\base.py:2237: size=194 KiB, count=17, average=11.4 KiB
C:\Users\user\anaconda3\lib\selectors.py:315: size=144 KiB, count=5, average=28.8 KiB
C:\Users\user\anaconda3\lib\site-packages\numpy\core\_asarray.py:102: size=119 KiB, count=387, average=314 B
C:\Users\user\anaconda3\lib\linecache.py:137: size=114 KiB, count=1165, average=100 B
C:\Users\user\anaconda3\lib\site-packages\matplotlib\lines.py:377: size=97.0 KiB, count=45, average=2208 B
C:\Users\user\anaconda3\lib\site-packages\matplotlib\lines.py:672: size=95.9 KiB, count=28, average=3509 B
<unknown>:0: size=95.8 KiB, count=23, average=4264 B
C:\Users\user\anaconda3\lib\site-packages\matplotlib\text.py:265: size=90.6 KiB, count=42, average=2208 B
```````````
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(2)
我遇到了同样的问题,以下解决方案也有效:
https:/ /matplotlib.org/stable/faq/howto_faq.html#work-work-with-threads
bug: https://github.com/matplotlib/matplotlib/issues/20300
I had the same issue, the following solution worked:
Source: https://matplotlib.org/stable/faq/howto_faq.html#work-with-threads
Bug: https://github.com/matplotlib/matplotlib/issues/20300
我的问题是您创建大量数字。
您可以尝试将
嵌套的循环放在外面。仅创建一个数字。然后在
内部使用内部清除图形,然后再添加所有内部的东西。
您还可以绘制for循环之外的第一个图,并使用
set_data
更新图的数据。如果保存图形,则不需要plt.show(图)
。I thing the issue is that you create a large amount of figures.
You can try to put
outside the for nested for loop. To create only one figure. And then use
inside to clear the figure before adding all the stuffs inside.
You could also plot the first figure outside the for loop and update the data of the figure with
set_data
. Theplt.show(fig)
is not needed if you save the figure.