Lines Matching +full:check +full:- +full:cfi +full:- +full:x86_64
2 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
4 # Copyright (C) 2017-2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
5 # Copyright (C) 2017-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
6 # Copyright (C) 2006-2017 CRYPTOGAMS by <appro@openssl.org>. All Rights Reserved.
20 # This module implements Poly1305 hash for x86_64.
35 # Skylake-X system performance. Since we are likely to suppress
36 # AVX512F capability flag [at least on Skylake-X], conversion serves
43 # IALU/gcc-4.8(*) AVX(**) AVX2 AVX-512
44 # P4 4.46/+120% -
45 # Core 2 2.41/+90% -
46 # Westmere 1.88/+120% -
49 # Skylake[-X] 1.13/+120% 0.96 0.51 [0.35]
50 # Silvermont 2.83/+95% -
52 # Goldmont 1.70/+180% -
53 # VIA Nano 1.82/+150% -
54 # Sledgehammer 1.38/+160% -
61 # (**) SSE2 implementation was attempted, but among non-AVX processors
62 # it was faster than integer-only code only on older Intel P4 and
63 # Core processors, 50-30%, less newer processor is, but slower on
78 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
79 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
80 die "can't locate x86_64-xlate.pl";
85 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
86 =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
91 `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)(?:\.([0-9]+))?/) {
97 `ml64 2>&1` =~ /Version ([0-9]+)\./) {
101 if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([3-9]\.[0-9]+)/) {
126 $code .= ".size $name,.-$name\n";
173 .asciz "Poly1305 for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
183 # input: copy of $r1 in %rax, $h0-$h2, $r0-$r1
184 # output: $h0-$h2 *= $r0-$r1
213 mov \$-4,%rax # mask value
264 bt \$`60-32`,%r9 # AVX?
351 dec %r15 # len-=16
372 .cfi_adjust_cfa_offset -48
392 shr \$2,%r10 # did 130-bit value overflow?
423 .type __poly1305_block,\@abi-omnipotent
432 .size __poly1305_block,.-__poly1305_block
434 .type __poly1305_init_avx,\@abi-omnipotent
454 mov %eax,`16*0+0-64`($ctx)
456 mov %edx,`16*0+4-64`($ctx)
463 mov %eax,`16*1+0-64`($ctx)
465 mov %edx,`16*1+4-64`($ctx)
467 mov %eax,`16*2+0-64`($ctx)
469 mov %edx,`16*2+4-64`($ctx)
480 mov %eax,`16*3+0-64`($ctx)
482 mov %edx,`16*3+4-64`($ctx)
484 mov %eax,`16*4+0-64`($ctx)
486 mov %edx,`16*4+4-64`($ctx)
495 mov %eax,`16*5+0-64`($ctx)
497 mov %edx,`16*5+4-64`($ctx)
499 mov %eax,`16*6+0-64`($ctx)
501 mov %edx,`16*6+4-64`($ctx)
507 mov $d1#d,`16*7+0-64`($ctx)
509 mov $d2#d,`16*7+4-64`($ctx)
511 mov $d1#d,`16*8+0-64`($ctx)
512 mov $d2#d,`16*8+4-64`($ctx)
521 mov %eax,`16*0+12-64`($ctx)
525 mov %edx,`16*1+12-64`($ctx)
528 mov %edx,`16*2+12-64`($ctx)
534 mov %eax,`16*3+12-64`($ctx)
537 mov %eax,`16*4+12-64`($ctx)
542 mov %edx,`16*5+12-64`($ctx)
545 mov %edx,`16*6+12-64`($ctx)
550 mov $d1#d,`16*7+12-64`($ctx)
552 mov $d1#d,`16*8+12-64`($ctx)
561 mov %eax,`16*0+8-64`($ctx)
565 mov %edx,`16*1+8-64`($ctx)
568 mov %edx,`16*2+8-64`($ctx)
574 mov %eax,`16*3+8-64`($ctx)
577 mov %eax,`16*4+8-64`($ctx)
582 mov %edx,`16*5+8-64`($ctx)
585 mov %edx,`16*6+8-64`($ctx)
590 mov $d1#d,`16*7+8-64`($ctx)
592 mov $d1#d,`16*8+8-64`($ctx)
594 lea -48-64($ctx),$ctx # size [de-]optimization
597 .size __poly1305_init_avx,.-__poly1305_init_avx
610 and \$-16,$len
645 ################################# base 2^26 -> base 2^64
647 and \$`-1*(1<<31)`,$d1
650 and \$`-1*(1<<31)`,$d2
666 mov \$-4,$d2 # ... so reduce
691 ################################# base 2^64 -> base 2^26
796 ################################# base 2^64 -> base 2^26
855 and \$-32,%rsp
856 sub \$-8,%rsp
857 lea -0x58(%rsp),%r11
861 lea -0xf8(%rsp),%r11
877 lea -32($inp),%rax
908 # expand and copy pre-calculated table to stack
909 vmovdqu `16*1-64`($ctx),$D1
910 vmovdqu `16*2-64`($ctx),$D2
911 vpshufd \$0xEE,$D4,$D3 # 34xx -> 3434
912 vpshufd \$0x44,$D4,$D0 # xx12 -> 1212
913 vmovdqa $D3,-0x90(%r11)
916 vmovdqu `16*3-64`($ctx),$D0
918 vmovdqa $D4,-0x80(%r11)
921 vmovdqu `16*4-64`($ctx),$D1
923 vmovdqa $D3,-0x70(%r11)
926 vmovdqu `16*5-64`($ctx),$D2
928 vmovdqa $D4,-0x60(%r11)
931 vmovdqu `16*6-64`($ctx),$D0
933 vmovdqa $D3,-0x50(%r11)
936 vmovdqu `16*7-64`($ctx),$D1
938 vmovdqa $D4,-0x40(%r11)
941 vmovdqu `16*8-64`($ctx),$D2
943 vmovdqa $D3,-0x30(%r11)
947 vmovdqa $D4,-0x20(%r11)
952 vmovdqa $D3,-0x10(%r11)
1043 vmovdqa -0x90(%r11),$T4 # r0^4
1085 vmovdqa -0x80(%r11),$T2 # r1^4
1091 vpmuludq -0x70(%r11),$H4,$T0 # h4*s1
1098 vmovdqa -0x60(%r11),$T3 # r2^4
1105 vmovdqa -0x50(%r11),$T4 # s2^4
1110 vmovdqa -0x40(%r11),$T2 # r3^4
1115 vmovdqa -0x30(%r11),$T3 # s3^4
1120 vmovdqa -0x10(%r11),$T4 # s4^4
1141 vpmuludq -0x20(%r11),$H0,$H4 # h0*r4
1169 vpaddq $D3,$H4,$H4 # h3 -> h4
1173 vpaddq $D0,$D1,$H1 # h0 -> h1
1180 vpaddq $D1,$H2,$H2 # h1 -> h2
1184 vpaddq $D0,$H0,$H0 # h4 -> h0
1188 vpaddq $D2,$H3,$H3 # h2 -> h3
1192 vpaddq $D0,$H1,$H1 # h0 -> h1
1196 vpaddq $D3,$H4,$H4 # h3 -> h4
1204 vpshufd \$0x10,$D4,$D4 # r0^n, xx12 -> x1x2
1229 vpshufd \$0x10,`16*1-64`($ctx),$H2 # r1^n
1236 vpshufd \$0x10,`16*2-64`($ctx),$H3 # s1^n
1239 vpshufd \$0x10,`16*3-64`($ctx),$H4 # r2^n
1247 vpshufd \$0x10,`16*4-64`($ctx),$H2 # s2^n
1252 vpshufd \$0x10,`16*5-64`($ctx),$H3 # r3^n
1257 vpshufd \$0x10,`16*6-64`($ctx),$H4 # s3^n
1265 vpshufd \$0x10,`16*7-64`($ctx),$H2 # r4^n
1268 vpshufd \$0x10,`16*8-64`($ctx),$H3 # s4^n
1306 vpshufd \$0x32,`16*0-64`($ctx),$T4 # r0^n, 34xx -> x3x4
1322 vpshufd \$0x32,`16*1-64`($ctx),$T2 # r1^n
1330 vpshufd \$0x32,`16*2-64`($ctx),$T3 # s1
1333 vpshufd \$0x32,`16*3-64`($ctx),$T4 # r2
1341 vpshufd \$0x32,`16*4-64`($ctx),$T2 # s2
1346 vpshufd \$0x32,`16*5-64`($ctx),$T3 # r3
1351 vpshufd \$0x32,`16*6-64`($ctx),$T4 # s3
1359 vpshufd \$0x32,`16*7-64`($ctx),$T2 # r4
1362 vpshufd \$0x32,`16*8-64`($ctx),$T3 # s4
1399 vpaddq $H3,$D4,$D4 # h3 -> h4
1403 vpaddq $H0,$D1,$D1 # h0 -> h1
1410 vpaddq $H1,$D2,$D2 # h1 -> h2
1414 vpaddq $H4,$D0,$D0 # h4 -> h0
1418 vpaddq $H2,$D3,$D3 # h2 -> h3
1422 vpaddq $H0,$D1,$D1 # h0 -> h1
1426 vpaddq $H3,$D4,$D4 # h3 -> h4
1428 vmovd $D0,`4*0-48-64`($ctx) # save partially reduced
1429 vmovd $D1,`4*1-48-64`($ctx)
1430 vmovd $D2,`4*2-48-64`($ctx)
1431 vmovd $D3,`4*3-48-64`($ctx)
1432 vmovd $D4,`4*4-48-64`($ctx)
1449 lea -8(%r10),%rsp
1470 shl \$26,%rcx # base 2^26 -> base 2^64
1490 and \$-4,%rcx
1501 shr \$2,%r10 # did 130-bit value overflow?
1532 and \$-16,$len
1567 ################################# base 2^26 -> base 2^64
1569 and \$`-1*(1<<31)`,$d1
1572 and \$`-1*(1<<31)`,$d2
1588 mov \$-4,$d2 # ... so reduce
1619 ################################# base 2^64 -> base 2^26
1729 ################################# base 2^64 -> base 2^26
1800 test \$`1<<16`,%r9d # check for AVX512F
1816 vmovdqa %xmm6,-0xb0(%r10)
1817 vmovdqa %xmm7,-0xa0(%r10)
1818 vmovdqa %xmm8,-0x90(%r10)
1819 vmovdqa %xmm9,-0x80(%r10)
1820 vmovdqa %xmm10,-0x70(%r10)
1821 vmovdqa %xmm11,-0x60(%r10)
1822 vmovdqa %xmm12,-0x50(%r10)
1823 vmovdqa %xmm13,-0x40(%r10)
1824 vmovdqa %xmm14,-0x30(%r10)
1825 vmovdqa %xmm15,-0x20(%r10)
1833 # expand and copy pre-calculated table to stack
1834 vmovdqu `16*0-64`($ctx),%x#$T2
1835 and \$-512,%rsp
1836 vmovdqu `16*1-64`($ctx),%x#$T3
1837 vmovdqu `16*2-64`($ctx),%x#$T4
1838 vmovdqu `16*3-64`($ctx),%x#$D0
1839 vmovdqu `16*4-64`($ctx),%x#$D1
1840 vmovdqu `16*5-64`($ctx),%x#$D2
1842 vmovdqu `16*6-64`($ctx),%x#$D3
1843 vpermd $T2,$T0,$T2 # 00003412 -> 14243444
1844 vmovdqu `16*7-64`($ctx),%x#$D4
1846 vmovdqu `16*8-64`($ctx),%x#$MASK
1850 vmovdqa $T3,0x20-0x90(%rax)
1852 vmovdqa $T4,0x40-0x90(%rax)
1854 vmovdqa $D0,0x60-0x90(%rax)
1856 vmovdqa $D1,0x80-0x90(%rax)
1858 vmovdqa $D2,0xa0-0x90(%rax)
1860 vmovdqa $D3,0xc0-0x90(%rax)
1861 vmovdqa $D4,0xe0-0x90(%rax)
1862 vmovdqa $MASK,0x100-0x90(%rax)
1911 vmovdqa `32*6-0x90`(%rax),$T3 # s3^4
1912 vmovdqa `32*8-0x90`(%rax),$S4 # s4^4
1943 vmovdqa `32*4-0x90`(%rax),$T1 # s2
1961 vmovdqa `32*5-0x90`(%rax),$H2 # r3
1987 vpmuludq `32*7-0x90`(%rax),$H0,$H4 # h0*r4
1998 vpaddq $D3,$H4,$H4 # h3 -> h4
2002 vpaddq $D0,$D1,$H1 # h0 -> h1
2011 vpaddq $D1,$H2,$H2 # h1 -> h2
2015 vpaddq $D4,$H0,$H0 # h4 -> h0
2022 vpaddq $D2,$H3,$H3 # h2 -> h3
2024 vpaddq $T2,$H2,$H2 # modulo-scheduled
2029 vpaddq $D0,$H1,$H1 # h0 -> h1
2035 vpaddq $D3,$H4,$H4 # h3 -> h4
2061 vmovdqu `32*6+4-0x90`(%rax),$T3 # s3^4
2062 vmovdqu `32*8+4-0x90`(%rax),$S4 # s4^4
2082 vmovdqu `32*4+4-0x90`(%rax),$T1 # s2
2093 vmovdqu `32*5+4-0x90`(%rax),$H2 # r3
2112 vpmuludq `32*7+4-0x90`(%rax),$H0,$H4 # h0*r4
2148 vpaddq $D3,$H4,$H4 # h3 -> h4
2152 vpaddq $D0,$D1,$H1 # h0 -> h1
2159 vpaddq $D1,$H2,$H2 # h1 -> h2
2163 vpaddq $D4,$H0,$H0 # h4 -> h0
2167 vpaddq $D2,$H3,$H3 # h2 -> h3
2171 vpaddq $D0,$H1,$H1 # h0 -> h1
2175 vpaddq $D3,$H4,$H4 # h3 -> h4
2177 vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced
2178 vmovd %x#$H1,`4*1-48-64`($ctx)
2179 vmovd %x#$H2,`4*2-48-64`($ctx)
2180 vmovd %x#$H3,`4*3-48-64`($ctx)
2181 vmovd %x#$H4,`4*4-48-64`($ctx)
2184 vmovdqa -0xb0(%r10),%xmm6
2185 vmovdqa -0xa0(%r10),%xmm7
2186 vmovdqa -0x90(%r10),%xmm8
2187 vmovdqa -0x80(%r10),%xmm9
2188 vmovdqa -0x70(%r10),%xmm10
2189 vmovdqa -0x60(%r10),%xmm11
2190 vmovdqa -0x50(%r10),%xmm12
2191 vmovdqa -0x40(%r10),%xmm13
2192 vmovdqa -0x30(%r10),%xmm14
2193 vmovdqa -0x20(%r10),%xmm15
2194 lea -8(%r10),%rsp
2198 lea -8(%r10),%rsp
2230 vmovdqa %xmm6,-0xb0(%r10)
2231 vmovdqa %xmm7,-0xa0(%r10)
2232 vmovdqa %xmm8,-0x90(%r10)
2233 vmovdqa %xmm9,-0x80(%r10)
2234 vmovdqa %xmm10,-0x70(%r10)
2235 vmovdqa %xmm11,-0x60(%r10)
2236 vmovdqa %xmm12,-0x50(%r10)
2237 vmovdqa %xmm13,-0x40(%r10)
2238 vmovdqa %xmm14,-0x30(%r10)
2239 vmovdqa %xmm15,-0x20(%r10)
2247 # expand pre-calculated table
2248 vmovdqu `16*0-64`($ctx),%x#$D0 # will become expanded ${R0}
2249 and \$-512,%rsp
2250 vmovdqu `16*1-64`($ctx),%x#$D1 # will become ... ${R1}
2252 vmovdqu `16*2-64`($ctx),%x#$T0 # ... ${S1}
2253 vmovdqu `16*3-64`($ctx),%x#$D2 # ... ${R2}
2254 vmovdqu `16*4-64`($ctx),%x#$T1 # ... ${S2}
2255 vmovdqu `16*5-64`($ctx),%x#$D3 # ... ${R3}
2256 vmovdqu `16*6-64`($ctx),%x#$T3 # ... ${S3}
2257 vmovdqu `16*7-64`($ctx),%x#$D4 # ... ${R4}
2258 vmovdqu `16*8-64`($ctx),%x#$T4 # ... ${S4}
2259 vpermd $D0,$T2,$R0 # 00003412 -> 14243444
2265 vpsrlq \$32,$R0,$T0 # 14243444 -> 01020304
2354 vpaddq $M3,$D4,$D4 # d3 -> d4
2358 vpaddq $M0,$D1,$D1 # d0 -> d1
2365 vpaddq $M1,$D2,$D2 # d1 -> d2
2369 vpaddq $M4,$D0,$D0 # d4 -> d0
2373 vpaddq $M2,$D3,$D3 # d2 -> d3
2377 vpaddq $M0,$D1,$D1 # d0 -> d1
2381 vpaddq $M3,$D4,$D4 # d3 -> d4
2384 # at this point we have 14243444 in $R0-$S4 and 05060708 in
2385 # $D0-$D4, ...
2390 # ... since input 64-bit lanes are ordered as 73625140, we could
2392 # we could just flow along, hence the goal for $R0-$S4 is
2399 vpermd $R0,$M0,$R0 # 14243444 -> 1---2---3---4---
2405 vpermd $D0,$M0,${R0}{%k1} # 05060708 -> 1858286838784888
2541 vpaddq $H3,$D4,$H4 # h3 -> h4
2547 vpaddq $D0,$H1,$H1 # h0 -> h1
2556 vpaddq $D1,$H2,$H2 # h1 -> h2
2560 vpaddq $D4,$H0,$H0 # h4 -> h0
2562 vpaddq $T2,$H2,$H2 # modulo-scheduled
2567 vpaddq $D2,$D3,$H3 # h2 -> h3
2573 vpaddq $D0,$H1,$H1 # h0 -> h1
2579 vpaddq $D3,$H4,$H4 # h3 -> h4
2722 vpaddq $D3,$H4,$H4 # h3 -> h4
2728 vpaddq $D0,$H1,$H1 # h0 -> h1
2737 vpaddq $D1,$H2,$H2 # h1 -> h2
2743 vpaddq $D4,$H0,$H0 # h4 -> h0
2749 vpaddq $D2,$H3,$H3 # h2 -> h3
2755 vpaddq $D0,$H1,$H1 # h0 -> h1
2761 vpaddq $D3,$H4,$H4 # h3 -> h4
2768 vmovd %x#$H0,`4*0-48-64`($ctx)# save partially reduced
2769 vmovd %x#$H1,`4*1-48-64`($ctx)
2770 vmovd %x#$H2,`4*2-48-64`($ctx)
2771 vmovd %x#$H3,`4*3-48-64`($ctx)
2772 vmovd %x#$H4,`4*4-48-64`($ctx)
2776 movdqa -0xb0(%r10),%xmm6
2777 movdqa -0xa0(%r10),%xmm7
2778 movdqa -0x90(%r10),%xmm8
2779 movdqa -0x80(%r10),%xmm9
2780 movdqa -0x70(%r10),%xmm10
2781 movdqa -0x60(%r10),%xmm11
2782 movdqa -0x50(%r10),%xmm12
2783 movdqa -0x40(%r10),%xmm13
2784 movdqa -0x30(%r10),%xmm14
2785 movdqa -0x20(%r10),%xmm15
2786 lea -8(%r10),%rsp
2790 lea -8(%r10),%rsp
2833 # at amount of multiply-n-accumulate operations. Secondly, it makes it
2834 # impossible to pre-compute multiples of 5 [referred to as s[]/sN in
2883 movq \$-1,64($ctx) # write impossible value
2896 .size poly1305_init_base2_44,.-poly1305_init_base2_44
2914 # blocks with this single-block subroutine, otherwise ensure that
2953 vmovdqu32 0($inp),%x#$T0 # load input as ----3210
2956 vpermd $T0,$inp_permd,$T0 # ----3210 -> --322110
2987 vpaddq $Dhi,$Dlo,$Dlo # note topmost qword :-)
3003 dec %rax # len-=16
3013 .size poly1305_blocks_vpmadd52,.-poly1305_blocks_vpmadd52
3020 # and is handled in 256-bit %ymm registers.
3075 # at this point 64-bit lanes are ordered as 3-1-2-0
3220 vpsrldq \$8,$R0,$R0 # 0-1-0-2
3245 # at this point 64-bit lanes are ordered as x-1-x-0
3336 sub \$4,$len # len-=64
3444 sub \$2,$len # len-=32
3454 .size poly1305_blocks_vpmadd52_4x,.-poly1305_blocks_vpmadd52_4x
3492 vmovdqu64 128($ctx),$R2 # load 1-3-2-4 powers
3564 # At this point Rx holds 1324 powers, RRx - 5768, and the goal
3609 # at this point 64-bit lanes are ordered as 73625140
3701 sub \$8,$len # len-=128
3827 .size poly1305_blocks_vpmadd52_8x,.-poly1305_blocks_vpmadd52_8x
3854 shr \$2,%r10 # did 130-bit value overflow?
3864 .size poly1305_emit_base2_44,.-poly1305_emit_base2_44
3870 { # chacha20-poly1305 helpers
3875 .type xor128_encrypt_n_pad,\@abi-omnipotent
3919 .size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
3922 .type xor128_decrypt_n_pad,\@abi-omnipotent
3970 .size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
3984 .type se_handler,\@abi-omnipotent
3998 mov 120($context),%rax # pull context->Rax
3999 mov 248($context),%rbx # pull context->Rip
4001 mov 8($disp),%rsi # disp->ImageBase
4002 mov 56($disp),%r11 # disp->HandlerData
4006 cmp %r10,%rbx # context->Rip<.Lprologue
4009 mov 152($context),%rax # pull context->Rsp
4013 cmp %r10,%rbx # context->Rip>=.Lepilogue
4018 mov -8(%rax),%rbx
4019 mov -16(%rax),%rbp
4020 mov -24(%rax),%r12
4021 mov -32(%rax),%r13
4022 mov -40(%rax),%r14
4023 mov -48(%rax),%r15
4024 mov %rbx,144($context) # restore context->Rbx
4025 mov %rbp,160($context) # restore context->Rbp
4026 mov %r12,216($context) # restore context->R12
4027 mov %r13,224($context) # restore context->R13
4028 mov %r14,232($context) # restore context->R14
4029 mov %r15,240($context) # restore context->R14
4032 .size se_handler,.-se_handler
4034 .type avx_handler,\@abi-omnipotent
4048 mov 120($context),%rax # pull context->Rax
4049 mov 248($context),%rbx # pull context->Rip
4051 mov 8($disp),%rsi # disp->ImageBase
4052 mov 56($disp),%r11 # disp->HandlerData
4056 cmp %r10,%rbx # context->Rip<prologue label
4059 mov 152($context),%rax # pull context->Rsp
4063 cmp %r10,%rbx # context->Rip>=epilogue label
4066 mov 208($context),%rax # pull context->R11
4077 mov %rax,152($context) # restore context->Rsp
4078 mov %rsi,168($context) # restore context->Rsi
4079 mov %rdi,176($context) # restore context->Rdi
4081 mov 40($disp),%rdi # disp->ContextRecord
4088 mov 8(%rsi),%rdx # arg2, disp->ImageBase
4089 mov 0(%rsi),%r8 # arg3, disp->ControlPc
4090 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
4091 mov 40(%rsi),%r10 # disp->ContextRecord
4092 lea 56(%rsi),%r11 # &disp->HandlerData
4093 lea 24(%rsi),%r12 # &disp->EstablisherFrame
4112 .size avx_handler,.-avx_handler
4236 s/%r([a-z]+)#d/%e$1/g;
4237 s/%r([0-9]+)#d/%r$1d/g;
4241 s/(^\.type.*),[0-9]+$/\1/;
4242 s/(^\.type.*),\@abi-omnipotent+$/\1,\@function/;
4243 next if /^\.cfi.*/;