diff --git a/stackcollapse-crashbt.pl b/stackcollapse-crashbt.pl new file mode 100755 index 00000000..c3dbea0e --- /dev/null +++ b/stackcollapse-crashbt.pl @@ -0,0 +1,160 @@ +#!/usr/bin/perl -w + +use strict; +use Getopt::Long; + +my %collapsed; +my @stack; + +my $command; +my $cpu; +my $func_addr; +my $func_name; +my $include_pname; +my $no_cpu = 0; +my $no_taskaddr = 0; +my $pid; +my $pname; +my $stack_addr; +my $taskaddr; + + +GetOptions('no_taskaddr' => \$no_taskaddr, + 'no_cpu' => \$no_cpu) +or die < outfile\n + --no_taskaddr # Don't generate taskaddr to eliminate the stacksize + --no_cpu # Don't generate the stack based on the CPU number to eliminate the stacksize\n +USAGE_END + +sub remember_stack { + my ($stack, $count) = @_; + $collapsed{$stack} += $count; +} + + +sub summarize_stack_and_cleanup { + + if (!$no_taskaddr) { + unshift @stack, "$pname($pid) task_struct:0x$taskaddr"; + } else { + unshift @stack, "$pname"; + } + + if (!$no_cpu) { + unshift @stack, "CPU_$cpu"; + } + + remember_stack(join(";", @stack), 1); + + undef @stack; + undef $command; + undef $cpu; + undef $func_addr; + undef $func_name; + undef $pid; + undef $pname; + undef $stack_addr; + undef $taskaddr; +} + + +# +# Main loop +# +while (defined($_ = <>)) { + + # find the name of the process in the first line of callstack of ftrace. + # "bt -a" output ex: 'PID: 2045758 TASK: ffff9b70103e0000 CPU: 0 COMMAND: "ms_pipe_read"' + if (/^\s*PID\:\s+(\d+)\s+TASK\:\s+([a-f\d]+)\s+CPU\:\s+(\d+)\s+COMMAND\:\s+\"(.+)\"/) { + if (defined $pname) { + # The sample count flamegraph needs to summarize stack here + summarize_stack_and_cleanup(); + } + $pid = $1; + $taskaddr = $2; + $cpu = $3; + $pname = $4; + # print "pid: $pid, taskaddr: $taskaddr, cpu: $cpu, command: $pname\n"; + + next; + } + + # "bt -a" output: + # PID: 0 TASK: ffff9b769861da00 CPU: 1 COMMAND: "swapper/1" + # #0 [fffffe0000034e40] crash_nmi_callback at ffffffff85059c07 + # #1 [fffffe0000034e50] nmi_handle at ffffffff850322d0 + # #2 [fffffe0000034ea8] default_do_nmi at ffffffff85032784 + # #3 [fffffe0000034ec8] do_nmi at ffffffff85032992 + # #4 [fffffe0000034ef0] end_repeat_nmi at ffffffff85a01a62 + # [exception RIP: mwait_idle+0x77] + # RIP: ffffffff859c7557 RSP: ffffbf214c90be88 RFLAGS: 00000246 + # RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 + # RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 + # RBP: ffffbf214c90be98 R8: 0000000000000000 R9: 0000000000000002 + # R10: ffffbf214c90be18 R11: 000000000000406c R12: 0000000000000001 + # R13: ffff9b769861da00 R14: 0000000000000000 R15: 0000000000000000 + # ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 + # --- --- + # #5 [ffffbf214c90be88] mwait_idle at ffffffff859c7557 + # #6 [ffffbf214c90bea0] arch_cpu_idle at ffffffff85039e15 + # #7 [ffffbf214c90beb0] default_idle_call at ffffffff859c78f3 + # #8 [ffffbf214c90bec0] do_idle at ffffffff850d9bda + # #9 [ffffbf214c90bf00] cpu_startup_entry at ffffffff850d9e43 + # #10 [ffffbf214c90bf28] start_secondary at ffffffff8505b08b + # #11 [ffffbf214c90bf50] secondary_startup_64 at ffffffff850000d5 + # >8-----------------------8< + # PID: 253681 TASK: ffff9b8a66a2bc00 CPU: 24 COMMAND: "jujud" + # #0 [ffffbf216c013500] __schedule at ffffffff859c21a6 + # #1 [ffffbf216c0135a0] schedule at ffffffff859c26b6 + # #2 [ffffbf216c0135b8] schedule_timeout at ffffffff859c67db + # #3 [ffffbf216c013638] wait_for_completion at ffffffff859c3284 + # #4 [ffffbf216c013698] xfs_buf_submit_wait at ffffffffc0d197bf [xfs] + # #5 [ffffbf216c0136c0] xfs_bwrite at ffffffffc0d19cb4 [xfs] + # #6 [ffffbf216c0136e0] xfs_reclaim_inode at ffffffffc0d23f43 [xfs] + # #7 [ffffbf216c013730] xfs_reclaim_inodes_ag at ffffffffc0d2419c [xfs] + # #8 [ffffbf216c0138c0] xfs_reclaim_inodes_nr at ffffffffc0d25383 [xfs] + # #9 [ffffbf216c0138e0] xfs_fs_free_cached_objects at ffffffffc0d38699 [xfs] + # #10 [ffffbf216c0138f0] super_cache_scan at ffffffff8528818a + # #11 [ffffbf216c013948] shrink_slab at ffffffff851f21bb + # #12 [ffffbf216c013a18] shrink_slab at ffffffff851f2459 + # #13 [ffffbf216c013a28] shrink_node at ffffffff851f7458 + # #14 [ffffbf216c013ab0] do_try_to_free_pages at ffffffff851f774e + # #15 [ffffbf216c013b18] try_to_free_pages at ffffffff851f7ab1 + # #16 [ffffbf216c013ba0] __alloc_pages_slowpath at ffffffff851e5099 + # #17 [ffffbf216c013cb8] __alloc_pages_nodemask at ffffffff851e5e19 + # #18 [ffffbf216c013d20] alloc_pages_vma at ffffffff85248152 + # #19 [ffffbf216c013d68] do_huge_pmd_anonymous_page at ffffffff8525fcc7 + # #20 [ffffbf216c013dc0] __handle_mm_fault at ffffffff8521c0a7 + # #21 [ffffbf216c013e78] handle_mm_fault at ffffffff8521c88c + # #22 [ffffbf216c013ea8] __do_page_fault at ffffffff850783b1 + # #23 [ffffbf216c013f20] do_page_fault at ffffffff8507861e + # #24 [ffffbf216c013f50] page_fault at ffffffff85a01635 + if (defined $pname) { + if (/\s*\#\d+\s+\[([a-f\d]+)\]\s+(\w+)\s+at\s+([a-f\d]+)\s*(\[\w+\])*$/) { + $stack_addr = $1; + $func_name = $2; + $func_addr = $3; + if (defined $4) { + unshift @stack, "$func_name $4 <0x$func_addr>"; + } else { + unshift @stack, "$func_name <0x$func_addr>"; + } + # print "stack_addr: $stack_addr, func_name: $func_name, func_addr: $func_addr\n"; + } elsif (/\s*\-\-\-\s+\\s+\-\-\-/) { + + # print $_; + $func_name = ""; + unshift @stack, $func_name; + # print "func_name: $func_name\n"; + # Parse to any other weird lines which is not the stack trace + # or the kernel_stack line. + } else { + next; + } + } +} + +foreach my $k (sort { $a cmp $b } keys %collapsed) { + print "$k $collapsed{$k}\n"; +} diff --git a/stackcollapse-ftracelat.pl b/stackcollapse-ftracelat.pl new file mode 100755 index 00000000..1f4b11dd --- /dev/null +++ b/stackcollapse-ftracelat.pl @@ -0,0 +1,214 @@ +#!/usr/bin/perl -w + +use strict; +use Getopt::Long; + +my %collapsed; +my @stack; +my @time_stack; + +my $addr_offset; +my $blockseconds; +my $comm; +my $cpu; +my $date; +my $dtype; # dumpstack type +my $func_name; +my $gfp_flags; +my $hostname; +my $include_all_calltrace; +my $include_block_seconds; +my $include_debug; +my $include_no_offset; +my $include_no_page_order; +my $include_no_page_type; +my $include_no_pname; +my $kernelversion; +my $latency = 0; +my $lines_skip_before_next_stack = 0; +my $nr_line = 0; +my $pid; +my $pname; +my $oom_score_adj; +my $order; +my $time_accumulator = 0; +my $tsp; + + +GetOptions('all_calltrace' => \$include_all_calltrace, + 'block_seconds' => \$include_block_seconds, + 'debug' => \$include_debug, + 'no_offset' => \$include_no_offset, + 'no_page_order' => \$include_no_page_order, + 'no_page_type' => \$include_no_page_type, + 'no_process_name' => \$include_no_pname) +or die < outfile\n + --all_calltrace # Show all the call trace even there is no OOM/hungtask/soft lockup error. + --debug # Enable the debug message + --no_offset # Exclude the address offset from the symbol name. + # The stack merge will be better and simplied. + # However, by looking at the Flamegraph, it's + # not useful when addr2line is needed to + # identify the exact code piece. + +OOM Call Trace Options: + --no_page_order # Don't generate the flamegraph based on the page order + --no_page_type # Don't include the gfp_flgs in the stack + --process_name # include process names to isolate callstack + +HungTask/Soft Lockup options: + --block_seconds # Shows the blocked seconds with the process + # name. By default, the block seconds is not + # showed in the dump stack to facilitate the + # stack merge.\n +USAGE_END + + +sub debug_log { + my ($message) = @_; + print "$message" if $include_debug; +} + +sub remember_stack { + my ($stack, $count) = @_; + $collapsed{$stack} += $count*1000; + debug_log "$stack $collapsed{$stack}\n"; +} + +sub undefine_variables { + undef $addr_offset; + undef $comm; + undef $cpu; + undef $date; + undef $dtype; + undef $func_name; + undef $gfp_flags; + undef $hostname; + undef $kernelversion; + undef $oom_score_adj; + undef $order; + undef $pid; + undef $pname; + undef @stack; + undef $tsp; +} + +sub summarize_stack_and_cleanup { + my ($pop_func, $undef_var, $latency) = @_; + +out: + remember_stack(join(";", @stack), $latency); + pop @stack if $pop_func; + undefine_variables() if $undef_var; +} + + +# +# Main loop +# +while (defined($_ = <>)) { + + $nr_line = $.; + + # 1). Function start + # Processor-31570 [007] 2872459.738331: funcgraph_entry: | SyS_writev() { + if (/\s*(.+)\s+\[(\d+)\]\s+(\d+\.\d+)\:\s+(funcgraph_entry)\:\s+\|\s+(.+)\(\)\s+\{/) { + debug_log "##### function_entry #####\n"; + $pname = $1; + $cpu = $2; + $tsp = $3; + $func_name = $5; + + push @time_stack, $time_accumulator; + push @time_stack, $func_name; + $time_accumulator = 0; # Reset the time for each function + +=pod + if (!@stack) { + unshift @stack, $pname if ($pname); + unshift @stack, "CPU# $cpu" if ($cpu); + } +=cut + + debug_log "@time_stack\n"; + push @stack, "$func_name"; + next; # ready to parse the dump_stack call trace + } + + # 2). Single function + # Processor-31570 [007] 2872459.738333: funcgraph_entry: 0.376 us | fget_light(); + # Processor-31570 [006] 2872460.164409: funcgraph_entry: + 11.008 us | __x2apic_send_IPI_mask(); + # if (/\s*(.+)\s+\[(\d+)\]\s+(\d+\.\d+)\:\s+(.*)\:\s+(.*\s*\d+\.\d+)\s+.+\s+\|\s+(.+\(\));/) { + # + # Documentation/trace/ftrace.rst + # + means that the function exceeded 10 usecs. + # ! means that the function exceeded 100 usecs. + # # means that the function exceeded 1000 usecs. + # * means that the function exceeded 10 msecs. + # @ means that the function exceeded 100 msecs. + # $ means that the function exceeded 1 sec. + + if (/\s*(.+)\s+\[(\d+)\]\s+(\d+\.\d+)\:\s+(funcgraph_entry)\:\s+[+!#*@$]?\s*(\d+\.\d+)\s+.+\s+\|\s+(.+)\(\);/) { + debug_log "##### Single function #####\n"; + $pname = $1; + $cpu = $2; + $tsp = $3; + $latency = $5; + $func_name = $6; + push @stack, "$func_name"; + + $time_accumulator += $latency; # accumulate the time inside the function + summarize_stack_and_cleanup(1, 0, $latency); # pop:true, undef:false + next; # ready to parse the dump_stack call trace + } + + # 3). Function end + # Processor-31570 [007] 2872459.738421: funcgraph_exit: + 15.597 us | } + # if (/\s*(.+)\s+\[(\d+)\]\s+(\d+\.\d+)\:\s+(.*)\:\s+(.*\s*\d+\.\d+)\s+.+\s+\|\s+\}/) { + if (/\s*(.+)\s+\[(\d+)\]\s+(\d+\.\d+)\:\s+(funcgraph_exit)\:\s+[+!#*@$]?\s*(\d+\.\d+)\s+.+\s+\|\s+\}/) { + $pname = $1; + $cpu = $2; + $tsp = $3; + $latency = $5; + debug_log "L$nr_line: $tsp $pname $cpu $latency\n"; + + my $save_time_accumulator; + debug_log "##### funcgraph_exit } #####\n"; + my $len_time_stack = scalar @time_stack; + debug_log "len_time_stack: $len_time_stack\n"; + debug_log "\$time_stack[$len_time_stack - 1]: $time_stack[$len_time_stack - 1]\n"; + $_ = $time_stack[$len_time_stack - 1]; + + debug_log "\$_: $_\n"; + debug_log "\@stack: @stack\n"; + debug_log "\@time_stack: @time_stack\n"; + + until ($_ =~ /[^\d\.]/) { + $time_accumulator += $_; + $_ = pop @time_stack; + debug_log "pop value: $_\n"; + $len_time_stack = scalar @time_stack; + $_ = $time_stack[$len_time_stack - 1]; + } + + pop @time_stack; + debug_log "\@time_stack: @time_stack\n"; + + $save_time_accumulator = $latency; + debug_log "$latency -= $time_accumulator\n"; + $latency -= $time_accumulator; + debug_log "final latency: $latency\n"; + + $time_accumulator = $save_time_accumulator; + debug_log "new time_accumulator: $time_accumulator\n"; + + summarize_stack_and_cleanup(1, 0, $latency); + next; # ready to parse the dump_stack call trace + + } +} + +foreach my $k (sort { $a cmp $b } keys %collapsed) { + print "$k $collapsed{$k}\n"; +} diff --git a/stackcollapse-kernel.pl b/stackcollapse-kernel.pl new file mode 100755 index 00000000..5fe5b47e --- /dev/null +++ b/stackcollapse-kernel.pl @@ -0,0 +1,350 @@ +#!/usr/bin/perl -w + +use strict; +use Getopt::Long; + +my %collapsed; +my @stack; + +my $addr_offset; +my $blockseconds; +my $comm; +my $cpu; +my $date; +my $dtype; # dumpstack type +my $func_name; +my $gfp_flags; +my $hostname; +my $include_all_calltrace; +my $include_block_seconds; +my $include_no_offset; +my $include_no_page_order; +my $include_no_page_type; +my $include_no_pname; +my $kernelversion; +my $lines_skip_before_next_stack = 0; +my $nr_line = 0; +my $pid; +my $pname; +my $oom_score_adj; +my $order; +my $tsp; + + +GetOptions('all_calltrace' => \$include_all_calltrace, + 'block_seconds' => \$include_block_seconds, + 'no_offset' => \$include_no_offset, + 'no_page_order' => \$include_no_page_order, + 'no_page_type' => \$include_no_page_type, + 'no_process_name' => \$include_no_pname) +or die < outfile\n + --all_calltrace # Show all the call trace even there is no OOM/hungtask/soft lockup error. + --no_offset # Exclude the address offset from the symbol name. + # The stack merge will be better and simplied. + # However, by looking at the Flamegraph, it's + # not useful when addr2line is needed to + # identify the exact code piece. + +OOM Call Trace Options: + --no_page_order # Don't generate the flamegraph based on the page order + --no_page_type # Don't include the gfp_flgs in the stack + --process_name # include process names to isolate callstack + +HungTask/Soft Lockup options: + --block_seconds # Shows the blocked seconds with the process + # name. By default, the block seconds is not + # showed in the dump stack to facilitate the + # stack merge.\n +USAGE_END + + +sub remember_stack { + my ($stack, $count) = @_; + $collapsed{$stack} += $count; +} + +sub undefine_variables { + undef $addr_offset; + undef $comm; + undef $cpu; + undef $date; + undef $dtype; + undef $func_name; + undef $gfp_flags; + undef $hostname; + undef $kernelversion; + undef $oom_score_adj; + undef $order; + undef $pid; + undef $pname; + undef @stack; + undef $tsp; +} + +sub summarize_stack_and_cleanup { + + if (!$dtype) { + if ($pname && $pid) { + if ($include_block_seconds) { + unshift @stack, "$pname($pid) stuck for ${blockseconds}s "; + } else { + unshift @stack, "$pname($pid)"; + } + } + unshift @stack, "CPU#$cpu" if $cpu; + goto out; + } + + # For OOM, the CPU# is not added to the stack. It's not useful in the + # OOM debugging. + if ($dtype eq "oom") { + if (!$include_no_pname && defined($pname)) { + unshift @stack, $pname; + } + + if (!$include_no_page_type) { + if (defined($gfp_flags)) { + unshift @stack, "$gfp_flags"; + } + + } + + if (!$include_no_page_order) { + if (defined($order)) { + unshift @stack, "order $order"; + } + } + unshift @stack, "OOM Call Trace"; + + goto out; + } + + if ($dtype eq "hungtask") { + if (!$include_no_pname && defined($pname)) { + if ($include_block_seconds) { + unshift @stack, "$pname($pid) blocks for $blockseconds seconds"; + } else { + unshift @stack, "$pname($pid)"; + } + } + + unshift @stack, "Hung Task Call Trace"; + goto out; + } + + if ($dtype eq "softlockup") { + if (!$include_no_pname && defined($pname)) { + if ($include_block_seconds) { + unshift @stack, "$pname($pid) stuck for ${blockseconds}s "; + } else { + unshift @stack, "$pname($pid)"; + } + } + unshift @stack, "CPU#$cpu"; + unshift @stack, "Softlockup Call Trace"; + goto out; + } + +out: + if ($kernelversion) { + unshift @stack, $kernelversion; + } + # unshift @stack, $nr_line; + remember_stack(join(";", @stack), 1); + undefine_variables(); +} + + +# +# Main loop +# +while (defined($_ = <>)) { + + $nr_line = $.; + # 1). oom-killer message parsing. (mm/oom_kill.c:dump_header()) + # Mar 22 12:05:46 coolmarket-node4 kernel: [ 6230.958810] cloudflared invoked oom-killer: gfp_mask=0x100cca(GFP_HIGHUSER_MOVABLE), order=0, oom_score_adj=0 + # Mar 22 12:48:29 coolmarket-node4 kernel: [ 8794.500480] php-fpm7.4 invoked oom-killer: gfp_mask=0x100cca(GFP_HIGHUSER_MOVABLE), order=0, oom_score_adj=0 + # Mar 22 13:10:25 coolmarket-node4 kernel: [10103.572487] php-fpm7.4 invoked oom-killer: gfp_mask=0x100dca(GFP_HIGHUSER_MOVABLE|__GFP_ZERO), order=0, oom_score_adj=0 + if (/(\w+\s+\d+\s+\d+\:\d+\:\d+)\s+(.+)\s+kernel\:\s+\[\s*(\d+\.\d+)\]\s+(.+)\s+invoked\s+oom\-killer\:\s+gfp_mask=(0x[a-f0-9]+\(.+\)),\s+order=(\d+),\s+oom_score_adj=(\d+)/) { + $date = $1; + $hostname = $2; + $tsp = $3; + $pname = $4; + $gfp_flags = $5; + $order = $6; + $oom_score_adj = $4; + $dtype = "oom"; + # print "$date $hostname $tsp $pname $gfp_flags $order $oom_score_adj\n"; + next; # ready to parse the dump_stack call trace + } + + # 2). Hung task message parsing + # + # Mar 22 13:21:10 coolmarket-node4 kernel: [10755.701996] INFO: task php-fpm7.4:2823 blocked for more than 241 seconds. + # Mar 22 13:21:10 coolmarket-node4 kernel: [10755.703063] Not tainted 5.4.0-67-generic #75-Ubuntu + # Mar 22 13:21:10 coolmarket-node4 kernel: [10755.703891] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. + # Mar 22 13:21:10 coolmarket-node4 kernel: [10755.704700] php-fpm7.4 D 0 2823 1375 0x00000000 + # Mar 22 13:21:10 coolmarket-node4 kernel: [10755.704703] Call Trace: + # Mar 22 13:21:10 coolmarket-node4 kernel: [10755.704712] __schedule+0x2e3/0x740 + # + # Tainted example: + # <3>Aug 12 19:38:23 whp1r27c015 kernel: [ 1443.094547] INFO: task macompatsvc:8498 blocked for more than 120 seconds. + # <3>Aug 12 19:38:23 whp1r27c015 kernel: [ 1443.094548] Tainted: G W OX 3.13.0-156-generic #206-Ubuntu + if (/task\s+(.+)\:(\d+)\s+blocked\s+for\s+more\s+than\s+(\d+)\s+seconds/) { + $pname = $1; + $pid = $2; + $blockseconds = $3; + $dtype = "hungtask"; + # print "$pname($pid) block $blockseconds seconds!\n"; + } + + # Parse the kernel veresion for the hungtask + # Hungtask doesn't report the CPU number line as the task isn't running + # in any core. + if ($dtype && $dtype eq "hungtask") { + if (/ainted\:*\s+.*(\d+\.\d+\.\d+\-\d+.+)$/) { + $kernelversion = $1; + } + } + + # 3). soft lockup message parsing + # + # Mar 24 08:14:03 coolmarket-node4 kernel: [152325.589066] watchdog: BUG: soft lockup - CPU#6 stuck for 25s! [kworker/6:2:322623] + # Mar 24 08:14:03 coolmarket-node4 kernel: [152325.600966] rcu: 6-....: (1491 ticks this GP) idle=5ea/1/0x4000000000000002 softirq=3740800/3740800 fqs=830 + # Mar 24 08:14:03 coolmarket-node4 kernel: [152325.601776] Modules linked in: binfmt_misc rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs lockd grace fscache vmw_vsock_vmci_transport vsock dm_multipath scsi_dh_rdac scsi_dh_emc scsi_dh_alua intel_rapl_msr intel_rapl_common sb_edac rapl vmw_balloon joydev input_leds vmw_vmci serio_raw mac_hid sch_fq_codel tcp_htcp sunrpc ip_tables x_tables autofs4 btrfs zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear crct10dif_pclmul crc32_pclmul vmwgfx ghash_clmulni_intel ttm aesni_intel drm_kms_helper crypto_simd syscopyarea cryptd sysfillrect glue_helper vmw_pvscsi sysimgblt ahci fb_sys_fops psmouse libahci drm vmxnet3 i2c_piix4 pata_acpi + # Mar 24 08:14:03 coolmarket-node4 kernel: [152325.602781] CPU: 6 PID: 322623 Comm: kworker/6:2 Tainted: G L 5.4.0-67-generic #75-Ubuntu + # Mar 24 08:14:03 coolmarket-node4 kernel: [152325.602782] (detected by 12, t=65134 jiffies, g=24048537, q=75344) + # Mar 24 08:14:03 coolmarket-node4 kernel: [152325.602783] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016 + # Mar 24 08:14:03 coolmarket-node4 kernel: [152325.602786] Sending NMI from CPU 12 to CPUs 6: + # Mar 24 08:14:03 coolmarket-node4 kernel: [152325.602894] Workqueue: events_freezable vmballoon_work [vmw_balloon] + if (/watchdog\:\s+BUG\:\s+soft\s+lockup.*CPU\#(\d+)\s+stuck\s+for\s+(\d+)s\!\s+\[(.+)\:(\d+)/) { + $dtype = "softlockup"; + $cpu = $1; + $blockseconds = $2; + $pname = $3; + $pid = $4; + # print "line: $nr_line\n"; + } + + # dump_stack(): message example(detail example is put in front of the + # script. + # + # v5.4 + # Mar 22 12:05:46 coolmarket-node4 kernel: [ 6230.958817] CPU: 11 PID: 1505 Comm: cloudflared Not tainted 5.4.0-67-generic #75-Ubuntu + # Mar 22 12:05:46 coolmarket-node4 kernel: [ 6230.958817] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 04/05/2016 + # Mar 22 12:05:46 coolmarket-node4 kernel: [ 6230.958822] Call Trace: + # + # v4.4 + # 2021-03-08T14:23:50.849804+09:00 compute-5-20122205.domain.tld kernel: [7161107.888485] CPU: 27 PID: 147224 Comm: qemu-system-x86 Tainted: P D OE 4.4.0-133-generic #159~14.04.1-Ubuntu + # + # v3.13 + # <4>Aug 13 14:59:29 whp1r27c015 kernel: [53058.155599] CPU: 12 PID: 690 Comm: kswapd0 Tainted: G W OX 3.13.0-156-generic #206-Ubuntu + # <4>Aug 13 14:59:29 whp1r27c015 kernel: [53058.155600] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.7.1 001/22/2018 + # <4>Aug 13 14:59:29 whp1r27c015 kernel: [53058.155602] task: ffff881fcd3046b0 ti: ffff881fcd3c4000 task.ti: ffff881fcd3c4000 + # <4>Aug 13 14:59:29 whp1r27c015 kernel: [53058.155604] RIP: 0010:[] [] smp_call_function_many+0x28e/0x2f0 + if (/CPU\:\s+(\d+)\s+PID\:\s+(\d+)\s+Comm\:(.+)\s+(Not tainted|Tainted).*(\d+\.\d+\.\d+\-\d+.+)$/) { + $cpu = $1; + $pid = $2; + $pname = $3; # overwrite the pname + $kernelversion = $5; + + # print "$cpu $pid $pname $kernelversion\n"; + next; + } + + # dump_stack(): Call Trace output: + # v5.4 + # Mar 22 12:05:46 coolmarket-node4 kernel: [ 6230.958822] Call Trace: + # Mar 22 12:05:46 coolmarket-node4 kernel: [ 6230.958846] dump_stack+0x6d/0x8b + # Mar 22 12:05:46 coolmarket-node4 kernel: [ 6230.958852] dump_header+0x4f/0x1eb + # + # v3.13 + # <4>Aug 13 14:59:29 whp1r27c015 kernel: [53058.155640] Call Trace: + # <4>Aug 13 14:59:29 whp1r27c015 kernel: [53058.155645] [] ? do_kernel_range_flush+0x40/0x40 + # <4>Aug 13 14:59:29 whp1r27c015 kernel: [53058.155648] [] native_flush_tlb_others+0x2e/0x30 + if (defined($dtype) || $include_all_calltrace) { + + if (/\]\s+([^\>]+)\+(0x[a-f\d]+)\//) { + + $lines_skip_before_next_stack-- if $lines_skip_before_next_stack > 0; + # Filter out the function name starting with "?" mark + if (/\]\s+(\?\s+.+)\+(0x[a-f\d]+)\//) { + next; + } + + # In the following example, the "RIP:..." is the next callstack. + # Somehow, the kernel messes up the callstack and print the RIP + # next to the "page_fault+0x34/0x40". If there exist + # stack captured, just skip the RIP line starting with + # "RIP: 0010:copy_f..." + # + # Mar 22 13:25:12 coolmarket-node4 kernel: [10997.363857] page_fault+0x34/0x40 + # Mar 22 13:25:12 coolmarket-node4 kernel: [10997.363863] RIP: 0010:copy_fpstate_to_sigframe+0x126/0x370 + if (@stack) { + # Filter out the RIP line + if (/\]\s+RIP\:\s+(.+)\+(0x[a-f\d]+)\//) { + $lines_skip_before_next_stack = 0; + next; + } + } else { + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747160] RIP: 0010:0xffffffffc08172d2 + # -----8<----- start of the 12 lines + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747162] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc <55> 48 89 e5 48 81 ec 00 00 00 00 53 41 55 41 56 41 57 31 c0 45 31 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747163] RSP: 0018:ffffabd7c07709b8 EFLAGS: 00010246 ORIG_RAX: ffffffffffffff13 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747165] RAX: ffffffffc08172d2 RBX: ffff9cf947900000 RCX: 0000000000000036 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747166] RDX: 0000000000000036 RSI: ffffabd7c0b05038 RDI: ffff9cf94ed26600 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747167] RBP: ffffabd7c0770a08 R08: ffff9cf94ed26600 R09: 0000000000000000 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747168] R10: ffffabd7c0b05000 R11: 0000000000000090 R12: ffff9cf94ed26600 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747169] R13: ffff9cf947900000 R14: 0000000000000036 R15: ffff9cf94d07a000 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747170] FS: 0000000000000000(0000) GS:ffff9cf95ff80000(0000) knlGS:0000000000000000 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747171] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747172] CR2: 000056031c610fe8 CR3: 00000007917ae003 CR4: 00000000001606e0 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747234] Call Trace: + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747236] + # ----->8----- end of the 12 lines + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747241] ? packet_rcv+0xca/0x4c0 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747338] dev_queue_xmit_nit+0x267/0x280 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747340] dev_hard_start_xmit+0x67/0x1f0 + # Mar 22 13:36:01 coolmarket-node4 kernel: [11646.747341] ? validate_xmit_skb+0x2f0/0x340 + if (/\]\s+RIP\:\s+(.+)\+(0x[a-f\d]+)\//) { + $lines_skip_before_next_stack = 17; + # print "$nr_line: lines_skip_before... $_\n"; + } + + } + + $func_name = $1; + $addr_offset = $2; + + if ($include_no_offset) { + unshift @stack, "$func_name"; + } else { + # unshift @stack, "$nr_line $func_name+$addr_offset"; + unshift @stack, "$func_name+$addr_offset"; + } + + # If the @stack already has some call traces and the current + # parsed line is not a stack, the logic identifies it's the end + # of the call stack. + } else { + # The sample count flamegraph needs to summarize stack here + if (@stack) { + if ($lines_skip_before_next_stack > 0) { + # print "@stack\n"; + # print "lines_skip_before_next_stack:$lines_skip_before_next_stack\n"; + # print "$nr_line: $_\n"; + $lines_skip_before_next_stack--; + next; + } + # print "line: $nr_line, before summarize, $dtype/$cpu/$pid/$pname, stack: @stack\n"; + summarize_stack_and_cleanup(); + next; + } + } + } +} + +foreach my $k (sort { $a cmp $b } keys %collapsed) { + print "$k $collapsed{$k}\n"; +} diff --git a/stackcollapse-perf.pl b/stackcollapse-perf.pl index 5cefa82a..52cda3fb 100755 --- a/stackcollapse-perf.pl +++ b/stackcollapse-perf.pl @@ -326,7 +326,7 @@ sub inline { # 7f722d142778 Ljava/io/PrintStream;::print (/tmp/perf-19982.map) if (scalar(@inline) > 0) { $func .= "_[i]"; # inlined - } elsif ($annotate_kernel == 1 && $mod =~ m/(^\[|vmlinux$)/ && $mod !~ /unknown/) { + } elsif ($annotate_kernel == 1 && $mod =~ m/(^\[|(.ko|vmlinux.*)$)/ && $mod !~ /unknown/) { $func .= "_[k]"; # kernel } elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) { $func .= "_[j]"; # jitted diff --git a/stackcollapse-tracecmd.pl b/stackcollapse-tracecmd.pl new file mode 100755 index 00000000..cdebe0ed --- /dev/null +++ b/stackcollapse-tracecmd.pl @@ -0,0 +1,181 @@ +#!/usr/bin/perl -w + +use strict; +use Getopt::Long; + +my %collapsed; +my @stack; + +my $addr; +my $cpu; +my $func_name; +my $gfp_flags; +my $include_page_order; +my $include_page_type; +my $include_pname; +my $migrate_type; +my $mtype; +my $m_pid; +my $page_alloc_event_cpu; +my $pname; +my $order; +my $trace_func_name; +my $tsp; + + +GetOptions('page_order' => \$include_page_order, + 'page_type' => \$include_page_type, + 'process_name' => \$include_pname, + 'trace_func_name=s' => \$trace_func_name) +or die < outfile\n + --page_order # generate the flamegraph based on the page order + --page_type # include the migrate_type and gfp_flgs in the stack + --process_name # include process names to isolate callstack + --trace_func_name=FUNT # Traced function name\n +USAGE_END + + +sub remember_stack { + my ($stack, $count) = @_; + $collapsed{$stack} += $count; +} + + +sub summarize_stack_and_cleanup { + if ($include_pname) { + unshift @stack, $pname; + } + + if($include_page_type) { + + if(!defined($migrate_type)) { + return; + } + + if($migrate_type == 0) { + $mtype="MIGRATE_UNMOVABLE"; + } elsif($migrate_type == 1) { + $mtype="MIGRATE_MOVABLE"; + } elsif($migrate_type == 2) { + $mtype="MIGRATE_RECLAIMABLE"; + } elsif($migrate_type == 3) { + $mtype="MIGRATE_HIGHATOMIC"; + } elsif($migrate_type == 4) { + $mtype="MIGRATE_CMA"; + } elsif($migrate_type == 5) { + $mtype="MIGRATE_ISOLATE"; + } + + unshift @stack, "$mtype, $gfp_flags"; + } + + if($include_page_order) { + if(!defined($order)) { + return; + } + unshift @stack, "order $order"; + remember_stack(join(";", @stack), 2**$order * 4); + } else { + remember_stack(join(";", @stack), 1); + } + undef $pname; + undef @stack; + undef $cpu; + undef $tsp; + undef $page_alloc_event_cpu; + undef $order; + undef $migrate_type; + undef $gfp_flags; +} + + +# +# Main loop +# +while (defined($_ = <>)) { + + # Ftrace raw output + # 1287587.105144 | 1) | /* mm_page_alloc: page=0000000082b1fe73 pfn=1704824 order=0 migratetype=1 gfp_flags=GFP_HIGHUSER_MOVABLE|__GFP_ZERO */ + if ($include_page_order || $include_page_type) { + # ftrace raw output + if (/^\s+(\d+\.\d+)\s+|\s+(\d+)\).+mm_page_alloc.+order=(\d+)\s+migratetype=(\d+)\s+gfp_flags=(.+)\*/) { + if (defined $pname) { + summarize_stack_and_cleanup(); + } + $page_alloc_event_cpu = $2; + $order = $3; + $migrate_type = $4; + $gfp_flags = $5; + next; + } + # trace-cmd output + # sudo trace-cmd record -l __alloc_pages_nodemask -e kmem:mm_page_alloc -T sleep 1 + # <...>-187168 [003] 14540.343835: mm_page_alloc: page=0x3070e0 pfn=3174624 order=1 migratetype=0 gfp_flags=GFP_KERNEL_ACCOUNT|__GFP_ZERO + if (/^\s+(.+)\[(\d+)\]\s+\d+\.\d+.+mm_page_alloc.+order=(\d+)\s+migratetype=(\d+)\s+gfp_flags=(.+)(\*)*/) { + if (defined $pname) { + summarize_stack_and_cleanup(); + } + + $page_alloc_event_cpu = $2; + $order = $3; + $migrate_type = $4; + $gfp_flags = $5; + next; + } + } + + # find the name of the process in the first line of callstack of ftrace. + # trace-cmd output ex: swift-object-se-18999 [005] 3693413.521874: kernel_stack: + # ftrace output ex: <...>-503310 [001] .... 800755.732210: + if (/^\s+(.+)\s+\[(\d+)\]\s+.*?(\d+\.\d+)\:(\skernel_stack\:)*\s+\/) { + if (defined $pname && !($include_page_order || $include_page_type)) { + # The sample count flamegraph needs to summarize stack here + summarize_stack_and_cleanup(); + } + $pname = $1; + $cpu = $2; + $tsp = $3; + + if (defined $trace_func_name) { + unshift @stack, $trace_func_name; + } + next; + } + + # trace-cmd output: + # => xfs_buf_get_map (ffffffffc065ed5c) + # => xfs_buf_read_map (ffffffffc065f98d) + # ftrace output: + # => __alloc_pages_nodemask+0x233/0x320 + # => alloc_pages_current+0x87/0xe0 + # => __get_free_pages+0x11/0x40 + # => __tlb_remove_page_size+0x5b/0x90 + # => zap_pte_range.isra.0+0x2a5/0x7d0 + # => unmap_page_range+0x2dc/0x4a0 + # => unmap_single_vma+0x7f/0xf0 + # => unmap_vmas+0x70/0xe0 + # => exit_mmap+0xb4/0x1b0 + # => mmput+0x50/0x120 + # => do_exit+0x2f8/0xae0 + # => do_group_exit+0x43/0xa0 + # => __x64_sys_exit_group+0x18/0x20 + # => do_syscall_64+0x57/0x190 + # => entry_SYSCALL_64_after_hwframe+0x44/0xa9 + if (defined $pname) { + if (/\s*\=\>\s+(.+)\s*([\(\<]([a-f\d]+)[\)\>])*/) { + $func_name = $1; + $addr = $2; + unshift @stack, $func_name; + + # Parse to any other weird lines which is not the stack trace + # or the kernel_stack line. + } else { + next; + } + } +} + +foreach my $k (sort { $a cmp $b } keys %collapsed) { + print "$k $collapsed{$k}\n"; +} diff --git a/stackcollapse-tracecmd.py b/stackcollapse-tracecmd.py new file mode 100644 index 00000000..20f2ac56 --- /dev/null +++ b/stackcollapse-tracecmd.py @@ -0,0 +1,296 @@ +#!/usr/bin/python3 +import argparse +import re + + +""" funcgraph example +sysctl-1757 [000] 1718.797855: funcgraph_entry: | __kmalloc_track_caller() { +sysctl-1757 [000] 1718.797855: funcgraph_entry: 0.159 us | kmalloc_slab(); +sysctl-1757 [000] 1718.797855: funcgraph_entry: | _cond_resched() { +sysctl-1757 [000] 1718.797856: funcgraph_entry: 0.159 us | rcu_all_qs(); +sysctl-1757 [000] 1718.797856: funcgraph_exit: 0.466 us | } +sysctl-1757 [000] 1718.797856: funcgraph_entry: 0.159 us | should_failslab(); +sysctl-1757 [000] 1718.797856: funcgraph_entry: 0.165 us | memcg_kmem_put_cache(); +sysctl-1757 [000] 1718.797857: funcgraph_exit: 1.789 us | } +sysctl-1757 [000] 1718.797857: funcgraph_entry: | __check_object_size() { +sysctl-1757 [000] 1718.797857: funcgraph_entry: 0.166 us | check_stack_object(); +sysctl-1757 [000] 1718.797857: funcgraph_entry: 0.174 us | __virt_addr_valid(); +sysctl-1757 [000] 1718.797858: funcgraph_entry: 0.271 us | __check_heap_object(); +sysctl-1757 [000] 1718.797858: funcgraph_exit: 1.540 us | } +sysctl-1757 [000] 1718.797859: funcgraph_entry: 0.576 us | kfree(); +sysctl-1757 [000] 1718.797859: funcgraph_entry: 0.174 us | kfree(); +""" + + +class FtraceAnalyzer: + def __init__(self, gen_flamegraph=False, merge=False): + self.regexp_arr = { + "fentry": r".+funcgraph_entry.+\|\s+(\w+).+{", + "fentry_single": r".+\:\s+(?P