Opened 7 months ago

Last modified 7 months ago

#16298 new bug

Awful(?) code in AArch64 stg_BLACKHOLE entry code

Reported by: bgamari Owned by:
Priority: normal Milestone:
Component: Compiler Version: 8.7
Keywords: Cc:
Operating System: Unknown/Multiple Architecture: aarch64
Type of failure: Runtime performance bug Test Case:
Blocked By: Blocking:
Related Tickets: Differential Rev(s):
Wiki Page:

Description

It's been a while since I've look at ARM assembler but this doesn't look right:

Dump of assembler code for function stg_BLACKHOLE_info$def:
   0x0000ffffb66d6118 <+0>:     mov     x23, x22
   0x0000ffffb66d611c <+4>:     ldr     x22, [x22, #8]
   0x0000ffffb66d6120 <+8>:     tst     x22, #0x7
   0x0000ffffb66d6124 <+12>:    b.ne    0xffffb66d6224 <stg_BLACKHOLE_info$def+268>  // b.any
   0x0000ffffb66d6128 <+16>:    adrp    x25, 0xffffb66fb000
   0x0000ffffb66d612c <+20>:    adrp    x26, 0xffffb66fb000
   0x0000ffffb66d6130 <+24>:    adrp    x27, 0xffffb66fb000
   0x0000ffffb66d6134 <+28>:    adrp    x29, 0xffffb66fb000
   0x0000ffffb66d6138 <+32>:    ldr     x25, [x25, #3880]
   0x0000ffffb66d613c <+36>:    ldr     x26, [x26, #3888]
   0x0000ffffb66d6140 <+40>:    ldr     x27, [x27, #3896]
   0x0000ffffb66d6144 <+44>:    ldr     x29, [x29, #3904]
   0x0000ffffb66d6148 <+48>:    sub     x24, x19, #0x18
   0x0000ffffb66d614c <+52>:    ldr     x8, [x22]
   0x0000ffffb66d6150 <+56>:    cmp     x8, x25
...

Why do we adrp four times? Couldn't this be reused, with each ldr result going to a separate register?

This was generated by LLVM 7.

Change History (1)

comment:1 Changed 7 months ago by bgamari

I have been staring more at this code and have found it's even worse than I thought. Far more repetition than I would have thought would be necessary. Here's the full dump:

Dump of assembler code for function stg_BLACKHOLE_info$def:
   0x0000ffffb66d6118 <+0>:     mov     x23, x22          // x23 == x22 == blackhole closure
   0x0000ffffb66d611c <+4>:     ldr     x22, [x22, #8]    // x22 = bh->indirectee
   0x0000ffffb66d6120 <+8>:     tst     x22, #0x7         // GET_TAG(bh->indirectee) == 0?
   0x0000ffffb66d6124 <+12>:    b.ne    0xffffb66d6224 <stg_BLACKHOLE_info$def+268>  // b.any
   0x0000ffffb66d6128 <+16>:    adrp    x25, 0xffffb66fb000
   0x0000ffffb66d612c <+20>:    adrp    x26, 0xffffb66fb000
   0x0000ffffb66d6130 <+24>:    adrp    x27, 0xffffb66fb000         
   0x0000ffffb66d6134 <+28>:    adrp    x29, 0xffffb66fb000            
   0x0000ffffb66d6138 <+32>:    ldr     x25, [x25, #3880]
   0x0000ffffb66d613c <+36>:    ldr     x26, [x26, #3888]                           
   0x0000ffffb66d6140 <+40>:    ldr     x27, [x27, #3896]
   0x0000ffffb66d6144 <+44>:    ldr     x29, [x29, #3904]
   0x0000ffffb66d6148 <+48>:    sub     x24, x19, #0x18
   0x0000ffffb66d614c <+52>:    ldr     x8, [x22] 
   0x0000ffffb66d6150 <+56>:    cmp     x8, x25
   0x0000ffffb66d6154 <+60>:    b.eq    0xffffb66d61a8 <stg_BLACKHOLE_info$def+144>  // b.none
   0x0000ffffb66d6158 <+64>:    cmp     x8, x26
   0x0000ffffb66d615c <+68>:    b.eq    0xffffb66d6178 <stg_BLACKHOLE_info$def+96>  // b.none
   0x0000ffffb66d6160 <+72>:    cmp     x8, x29
   0x0000ffffb66d6164 <+76>:    b.eq    0xffffb66d6178 <stg_BLACKHOLE_info$def+96>  // b.none
   0x0000ffffb66d6168 <+80>:    adrp    x9, 0xffffb66fb000
   0x0000ffffb66d616c <+84>:    ldr     x9, [x9, #3912]
   0x0000ffffb66d6170 <+88>:    cmp     x8, x9
   0x0000ffffb66d6174 <+92>:    b.ne    0xffffb66d61b4 <stg_BLACKHOLE_info$def+156>  // b.any
   0x0000ffffb66d6178 <+96>:    orr     w1, wzr, #0x4
   0x0000ffffb66d617c <+100>:   mov     x0, x24
   0x0000ffffb66d6180 <+104>:   bl      0xffffb66c6fe8 <allocate>
   0x0000ffffb66d6184 <+108>:   str     x27, [x0]
   0x0000ffffb66d6188 <+112>:   ldr     x8, [x19, #872]
   0x0000ffffb66d618c <+116>:   mov     x22, x0
   0x0000ffffb66d6190 <+120>:   mov     x1, x22
   0x0000ffffb66d6194 <+124>:   stp     x8, x23, [x0, #16]
   0x0000ffffb66d6198 <+128>:   mov     x0, x24
   0x0000ffffb66d619c <+132>:   bl      0xffffb66ae648 <messageBlackHole>
   0x0000ffffb66d61a0 <+136>:   cbnz    x0, 0xffffb66d61e4 <stg_BLACKHOLE_info$def+204>
   0x0000ffffb66d61a4 <+140>:   ldr     x22, [x23, #8]
   0x0000ffffb66d61a8 <+144>:   tst     x22, #0x7
   0x0000ffffb66d61ac <+148>:   b.eq    0xffffb66d614c <stg_BLACKHOLE_info$def+52>  // b.none
   0x0000ffffb66d61b0 <+152>:   b       0xffffb66d6224 <stg_BLACKHOLE_info$def+268>
   0x0000ffffb66d61b4 <+156>:   tst     x22, #0x7
   0x0000ffffb66d61b8 <+160>:   b.ne    0xffffb66d6224 <stg_BLACKHOLE_info$def+268>  // b.any
   0x0000ffffb66d61bc <+164>:   b       0xffffb66d61c4 <stg_BLACKHOLE_info$def+172>
   0x0000ffffb66d61c0 <+168>:   ldr     x8, [x22]                                    // $x8 = indirectee->info
=> 0x0000ffffb66d61c4 <+172>:   ldursw  x9, [x8, #-8]
   0x0000ffffb66d61c8 <+176>:   sub     x10, x9, #0x1b
   0x0000ffffb66d61cc <+180>:   cmp     x10, #0x2
   0x0000ffffb66d61d0 <+184>:   b.cs    0xffffb66d6204 <stg_BLACKHOLE_info$def+236>  // b.hs, b.nlast
   0x0000ffffb66d61d4 <+188>:   ldr     x22, [x22, #8]
   0x0000ffffb66d61d8 <+192>:   tst     x22, #0x7
   0x0000ffffb66d61dc <+196>:   b.eq    0xffffb66d61c0 <stg_BLACKHOLE_info$def+168>  // b.none
   0x0000ffffb66d61e0 <+200>:   b       0xffffb66d6224 <stg_BLACKHOLE_info$def+268>
   0x0000ffffb66d61e4 <+204>:   ldr     x8, [x19, #872]
   0x0000ffffb66d61e8 <+208>:   orr     w9, wzr, #0x2
   0x0000ffffb66d61ec <+212>:   strh    w9, [x8, #34]
   0x0000ffffb66d61f0 <+216>:   ldr     x8, [x19, #872]
   0x0000ffffb66d61f4 <+220>:   str     x22, [x8, #40]
   0x0000ffffb66d61f8 <+224>:   mov     x22, x23
   0x0000ffffb66d61fc <+228>:   bl      0xffffb66d2808 <stg_block_blackhole$def>
   0x0000ffffb66d6200 <+232>:   ret
   0x0000ffffb66d6204 <+236>:   cmp     x9, #0x19
   0x0000ffffb66d6208 <+240>:   b.hi    0xffffb66d6230 <stg_BLACKHOLE_info$def+280>  // b.pmore
   0x0000ffffb66d620c <+244>:   orr     w10, wzr, #0x1
   0x0000ffffb66d6210 <+248>:   lsl     x9, x10, x9
   0x0000ffffb66d6214 <+252>:   mov     w10, #0x7f00                    // #32512
   0x0000ffffb66d6218 <+256>:   movk    w10, #0x280, lsl #16
   0x0000ffffb66d621c <+260>:   tst     x9, x10
   0x0000ffffb66d6220 <+264>:   b.eq    0xffffb66d6230 <stg_BLACKHOLE_info$def+280>  // b.none
   0x0000ffffb66d6224 <+268>:   ldr     x8, [x20]
   0x0000ffffb66d6228 <+272>:   blr     x8
   0x0000ffffb66d622c <+276>:   ret
   0x0000ffffb66d6230 <+280>:   blr     x8
   0x0000ffffb66d6234 <+284>:   ret
End of assembler dump.
Note: See TracTickets for help on using tickets.