nagios 占用cpu 90%以上
以下分别是 top 和 strace后的结果:
top 后:
Tasks: 230 total, 6 running, 223 sleeping, 0 stopped, 1 zombie
Cpu(s): 28.9%us, 26.4%sy, 0.0%ni, 44.1%id, 0.4%wa, 0.0%hi, 0.0%si, 0.0%st
Mem: 66001836k total, 8869816k used, 57132020k free, 559708k buffers
Swap: 16779884k total, 0k used, 16779884k free, 3876336k cached
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
4649 nagios 25 0 632m 610m 1048 D 95.3 0.9 518:07.35 nagios
25628 nagios 25 0 93812 9568 1684 R 3.0 0.0 0:00.09 check_tcpconns.
25630 nagios 25 0 86792 6732 1596 R 2.0 0.0 0:00.06 check_tcpconns.
25605 nagios 24 0 0 0 0 Z 1.7 0.0 0:00.05 nagios <defunct>
13349 root 16 0 159m 8944 4272 S 1.3 0.0 6:16.15 snmpd
23950 nagios 25 0 632m 610m 452 S 1.3 0.9 0:00.04 nagios
25625 nagios 25 0 632m 610m 452 S 1.0 0.9 0:00.03 nagios
25632 nagios 25 0 80656 4572 1428 R 1.0 0.0 0:00.03 check_tcpconns.
strace后:
Process 4649 attached - interrupt to quit--- SIGCHLD (Child exited) @ 0 (0) ---
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = ? ERESTARTNOINTR (To be restarted)
--- SIGCHLD (Child exited) @ 0 (0) ---
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = 8387
--- SIGCHLD (Child exited) @ 0 (0) ---
close(7) = 0
munmap(0x2aaaadfa3000, 4096) = 0
wait4(-1, NULL, WNOHANG, NULL) = 3950
wait4(-1, NULL, WNOHANG, NULL) = 8057
wait4(-1, NULL, WNOHANG, NULL) = 8073
wait4(-1, NULL, WNOHANG, NULL) = 8075
wait4(-1, NULL, WNOHANG, NULL) = 8080
wait4(-1, NULL, WNOHANG, NULL) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
umask(077) = 022
open("/usr/local/nagios/var/spool/checkresults/checkeZ3fQp", O_RDWR|O_CREAT|O_EXCL, 0600) = 7
fcntl(7, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(7, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaadfa3000
lseek(7, 0, SEEK_CUR) = 0
umask(022) = 077
write(7, "### Active Check Result File ###"..., 287) = 287
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = ? ERESTARTNOINTR (To be restarted)
--- SIGCHLD (Child exited) @ 0 (0) ---
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = ? ERESTARTNOINTR (To be restarted)
--- SIGCHLD (Child exited) @ 0 (0) ---
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = 8559
--- SIGCHLD (Child exited) @ 0 (0) ---
close(7) = 0
munmap(0x2aaaadfa3000, 4096) = 0
wait4(-1, NULL, WNOHANG, NULL) = 3953
wait4(-1, NULL, WNOHANG, NULL) = 7905
wait4(-1, NULL, WNOHANG, NULL) = 7907
wait4(-1, NULL, WNOHANG, NULL) = 7911
wait4(-1, NULL, WNOHANG, NULL) = 8027
wait4(-1, NULL, WNOHANG, NULL) = 8033
wait4(-1, NULL, WNOHANG, NULL) = 8143
wait4(-1, NULL, WNOHANG, NULL) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
umask(077) = 022
open("/usr/local/nagios/var/spool/checkresults/checkP7q0ZQ", O_RDWR|O_CREAT|O_EXCL, 0600) = 7
fcntl(7, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(7, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaadfa3000
lseek(7, 0, SEEK_CUR) = 0
umask(022) = 077
write(7, "### Active Check Result File ###"..., 294) = 294
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = 8564
close(7) = 0
munmap(0x2aaaadfa3000, 4096) = 0
wait4(-1, NULL, WNOHANG, NULL) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
umask(077) = 022
open("/usr/local/nagios/var/spool/checkresults/checkyEjbei", O_RDWR|O_CREAT|O_EXCL, 0600) = 7
fcntl(7, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(7, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaadfa3000
lseek(7, 0, SEEK_CUR) = 0
umask(022) = 077
write(7, "### Active Check Result File ###"..., 286) = 286
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = 8573
close(7) = 0
munmap(0x2aaaadfa3000, 4096) = 0
wait4(-1, NULL, WNOHANG, NULL) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
umask(077) = 022
open("/usr/local/nagios/var/spool/checkresults/checkJyTcxJ", O_RDWR|O_CREAT|O_EXCL, 0600) = 7
fcntl(7, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(7, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaadfa3000
lseek(7, 0, SEEK_CUR) = 0
umask(022) = 077
write(7, "### Active Check Result File ###"..., 292) = 292
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = 8583
--- SIGCHLD (Child exited) @ 0 (0) ---
close(7) = 0
munmap(0x2aaaadfa3000, 4096) = 0
wait4(-1, NULL, WNOHANG, NULL) = 8387
wait4(-1, NULL, WNOHANG, NULL) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
umask(077) = 022
open("/usr/local/nagios/var/spool/checkresults/checkM6raTa", O_RDWR|O_CREAT|O_EXCL, 0600) = 7
fcntl(7, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(7, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaadfa3000
lseek(7, 0, SEEK_CUR) = 0
umask(022) = 077
write(7, "### Active Check Result File ###"..., 297) = 297
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = 8584
close(7) = 0
munmap(0x2aaaadfa3000, 4096) = 0
wait4(-1, NULL, WNOHANG, NULL) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
umask(077) = 022
open("/usr/local/nagios/var/spool/checkresults/checkFvg4jC", O_RDWR|O_CREAT|O_EXCL, 0600) = 7
fcntl(7, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(7, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaadfa3000
lseek(7, 0, SEEK_CUR) = 0
umask(022) = 077
write(7, "### Active Check Result File ###"..., 290) = 290
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = 8629
close(7) = 0
munmap(0x2aaaadfa3000, 4096) = 0
wait4(-1, NULL, WNOHANG, NULL) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
umask(077) = 022
open("/usr/local/nagios/var/spool/checkresults/checkozZ8N3", O_RDWR|O_CREAT|O_EXCL, 0600) = 7
fcntl(7, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(7, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaadfa3000
lseek(7, 0, SEEK_CUR) = 0
umask(022) = 077
write(7, "### Active Check Result File ###"..., 287) = 287
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = 8663
close(7) = 0
munmap(0x2aaaadfa3000, 4096) = 0
wait4(-1, NULL, WNOHANG, NULL) = 0
stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=405, ...}) = 0
umask(077) = 022
open("/usr/local/nagios/var/spool/checkresults/checkLlCBmv", O_RDWR|O_CREAT|O_EXCL, 0600) = 7
fcntl(7, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE)
fstat(7, {st_mode=S_IFREG|0600, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2aaaadfa3000
lseek(7, 0, SEEK_CUR) = 0
umask(022) = 077
write(7, "### Active Check Result File ###"..., 292) = 292
clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x2ac20a7dc700) = ? ERESTARTNOINTR (To be restarted)
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。
绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(4)
谢谢你的帮助,最后发现我们更改源码时,内存溢出了,问题已终结
Over 60G的Memory?
CPU是几核的?
目前测试的hosts/services是多少?
貌似你的check_tcpconns plugin不是很好,cpu占用也比较高。
cpu是16核,services是两万左右,就是nagios跑一夜就上升到90%了,但是现在重启nagios几天了,又没事了,nagios的占有cpu和nagios.cfg有关?
Over 60G的Memory?
CPU是几核的?
目前测试的hosts/services是多少?
貌似你的check_tcpconns plugin不是很好,cpu占用也比较高。