1.使用top
查询出占用资源最高的程序和pid
[root@VM-12-9-centos ~]# top
top - 10:34:51 up 151 days, 14:46, 4 users, load average: 7.88, 8.00, 8.13
Tasks: 198 total, 8 running, 190 sleeping, 0 stopped, 0 zombie
%Cpu(s): 20.0 us, 79.8 sy, 0.0 ni, 0.1 id, 0.0 wa, 0.0 hi, 0.1 si, 0.0 st
KiB Mem : 8008644 total, 211792 free, 3804488 used, 3992364 buff/cache
KiB Swap: 0 total, 0 free, 0 used. 3859032 avail Mem
PID to signal/kill [default pid = 8817] ill -9
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
8817 root 20 0 250904 33572 2688 R 3.0 0.4 6287:50 php
9552 root 20 0 248128 34480 6296 S 2.7 0.4 5596:32 php
9562 root 20 0 248128 34480 6296 S 2.7 0.4 5451:31 php
9554 root 20 0 248128 34480 6296 S 2.3 0.4 5505:47 php
9564 root 20 0 248128 34484 6296 S 2.3 0.4 5345:03 php
3297 root 20 0 993820 77192 18748 S 1.3 1.0 140:27.37 YDService
12284 root 20 0 156856 5616 4272 R 1.0 0.1 0:00.13 sshd
15865 root 20 0 126348 1220 1008 R 1.0 0.0 0:00.14 strace
17619 root 20 0 842824 19204 3068 S 1.0 0.2 2237:00 barad_agent
27958 root 20 0 787048 86856 15340 S 0.7 1.1 0:40.60 BT-Panel
4473 root 20 0 1070600 13988 6592 S 0.3 0.2 1:49.62 YDLive
28200 root 20 0 1384064 20300 6200 S 0.3 0.3 1:38.29 BT-Task
1 root 20 0 51916 4000 2456 S 0.0 0.0 43:41.89 systemd
2 root 20 0 0 0 0 S 0.0 0.0 0:07.34 kthreadd
4 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/0:0H
6 root 20 0 0 0 0 S 0.0 0.0 60:43.14 ksoftirqd/0
7 root rt 0 0 0 0 S 0.0 0.0 0:34.60 migration/0
8 root 20 0 0 0 0 S 0.0 0.0 0:00.04 rcu_bh
9 root 20 0 0 0 0 S 0.0 0.0 381:47.98 rcu_sched
10 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 lru-add-drain
11 root rt 0 0 0 0 S 0.0 0.0 3:03.63 watchdog/0
12 root rt 0 0 0 0 S 0.0 0.0 3:04.37 watchdog/1
13 root rt 0 0 0 0 S 0.0 0.0 0:35.39 migration/1
14 root 20 0 0 0 0 R 0.0 0.0 57:27.61 ksoftirqd/1
16 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/1:0H
17 root rt 0 0 0 0 S 0.0 0.0 3:02.59 watchdog/2
18 root rt 0 0 0 0 S 0.0 0.0 0:35.17 migration/2
19 root 20 0 0 0 0 S 0.0 0.0 57:48.05 ksoftirqd/2
21 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/2:0H
22 root rt 0 0 0 0 S 0.0 0.0 3:10.39 watchdog/3
23 root rt 0 0 0 0 S 0.0 0.0 0:34.69 migration/3
24 root 20 0 0 0 0 S 0.0 0.0 59:06.12 ksoftirqd/3
26 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/3:0H
28 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kdevtmpfs
29 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 netns
30 root 20 0 0 0 0 S 0.0 0.0 0:07.67 khungtaskd
可以看到我前面的基本都是PHP程序
2.通过srrace -p PID
跟踪指定pid的系统调用及其接收的信号
root@VM-12-9-centos ~]# strace -p 8817
strace: Process 8817 attached
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 255}], WSTOPPED, NULL) = 15863
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=15863, si_uid=0, si_status=255, si_utime=0, si_stime=1} ---
lstat("/www/wwwroot/jys.bolinshe.com/service/vendor/workerman/workerman/../workerman.log", 0x7ffc3c6f0af0) = -1 ENOENT (No such file or directory)
lstat("/www/wwwroot/jys.bolinshe.com/service/vendor/workerman/workerman", 0x7ffc3c6f0820) = -1 ENOENT (No such file or directory)
lstat("/www/wwwroot/jys.bolinshe.com/service/vendor/workerman", 0x7ffc3c6f06a0) = -1 ENOENT (No such file or directory)
lstat("/www/wwwroot/jys.bolinshe.com/service/vendor", 0x7ffc3c6f0520) = -1 ENOENT (No such file or directory)
lstat("/www/wwwroot/jys.bolinshe.com/service", 0x7ffc3c6f03b0) = -1 ENOENT (No such file or directory)
open("/www/wwwroot/jys.bolinshe.com/service/vendor/workerman/workerman.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = -1 ENOENT (No such file or directory)
fstat(1, {st_mode=S_IFCHR|0666, st_rdev=makedev(1, 3), ...}) = 0
ioctl(1, TCGETS, 0x7ffc3c6f3690) = -1 ENOTTY (Inappropriate ioctl for device)
write(1, "file_put_contents(/www/wwwroot/j"..., 248) = 248
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f779ac30a10) = 15874
wait4(-1, [{WIFEXITED(s) && WEXITSTATUS(s) == 255}], WSTOPPED, NULL) = 15874
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=15874, si_uid=0, si_status=255, si_utime=0, si_stime=1} ---
lstat("/www/wwwroot/jys.bolinshe.com/service/vendor/workerman/workerman/../workerman.log", 0x7ffc3c6f0af0) = -1 ENOENT (No such file or directory)
lstat("/www/wwwroot/jys.bolinshe.com/service/vendor/workerman/workerman", 0x7ffc3c6f0820) = -1 ENOENT (No such file or directory)
lstat("/www/wwwroot/jys.bolinshe.com/service/vendor/workerman", 0x7ffc3c6f06a0) = -1 ENOENT (No such file or directory)
lstat("/www/wwwroot/jys.bolinshe.com/service/vendor", 0x7ffc3c6f0520) = -1 ENOENT (No such file or directory)
lstat("/www/wwwroot/jys.bolinshe.com/service", 0x7ffc3c6f03b0) = -1 ENOENT (No such file or directory)
open("/www/wwwroot/jys.bolinshe.com/service/vendor/workerman/workerman.log", O_WRONLY|O_CREAT|O_APPEND, 0666) = -1 ENOENT (No such file or directory)
fstat(1, {st_mode=S_IFCHR|0666, st_rdev=makedev(1, 3), ...}) = 0
ioctl(1, TCGETS, 0x7ffc3c6f3690) = -1 ENOTTY (Inappropriate ioctl for device)
write(1, "file_put_contents(/www/wwwroot/j"..., 248) = 248
发现是这个站点的workerman导致的
3.先kill
掉进程,恢复服务器性能,在到站点排查具体的问题
另外:如果是Java程序有大厂开发的 arthas
工具,可以找到Java程序具体第几行出现问题