runtime: Inlined struct copies via params, returns and assignment not elided

As seen in ValueTaskAwaiter from https://github.com/dotnet/coreclr/pull/22735, https://github.com/dotnet/coreclr/pull/22738 wasn’t a specific issue for it with simple repo so opening this.

using System.Runtime.CompilerServices;

class Program
{
    static void Main(string[] args)
    {
        InlinedAssignment();
        InlinedCtor();
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    static long InlinedAssignment()
    {
        var s = CreateLargeStruct();
        s = GetLargeStruct(s);

        return s.l3;
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    static long InlinedCtor()
    {
        var s = new LargeStruct2(CreateLargeStruct());

        return s._largeStruct.l3;
    }

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    static LargeStruct GetLargeStruct(LargeStruct l) => l;

    [MethodImpl(MethodImplOptions.NoInlining)]
    static LargeStruct CreateLargeStruct() => new LargeStruct();
}

readonly struct LargeStruct
{
    public readonly long l0;
    public readonly long l1;
    public readonly long l2;
    public readonly long l3;
}

readonly struct LargeStruct2
{
    public readonly LargeStruct _largeStruct;

    [MethodImpl(MethodImplOptions.AggressiveInlining)]
    public LargeStruct2(LargeStruct largeStruct)
    {
        _largeStruct = largeStruct;
    }
}

Produces

; Assembly listing for method Program:InlinedAssignment():long
;  V00 OutArgs      [V00    ] (  1,  1   )  lclBlk (32) [rsp+0x00]   "OutgoingArgSpace"
;  V01 tmp1         [V01    ] (  2,  4   )  struct (32) [rsp+0x48]   do-not-enreg[XSB] addr-exposed "struct address for call/obj"
;* V02 tmp2         [V02    ] (  0,  0   )  struct (32) zero-ref    "struct address for call/obj"
;  V03 tmp3         [V03    ] (  2,  4   )  struct (32) [rsp+0x28]   do-not-enreg[XSB] addr-exposed "Inlining Arg"
;* V04 tmp4         [V04    ] (  0,  0   )    long  ->  zero-ref    V02.l0(offs=0x00) P-INDEP "field V02.l0 (fldOffset=0x0)"
;* V05 tmp5         [V05    ] (  0,  0   )    long  ->  zero-ref    V02.l1(offs=0x08) P-INDEP "field V02.l1 (fldOffset=0x8)"
;* V06 tmp6         [V06    ] (  0,  0   )    long  ->  zero-ref    V02.l2(offs=0x10) P-INDEP "field V02.l2 (fldOffset=0x10)"
;  V07 tmp7         [V07,T01] (  2,  2   )    long  ->  rax         V02.l3(offs=0x18) P-INDEP "field V02.l3 (fldOffset=0x18)"
;  V08 tmp8         [V08,T00] (  3,  6   )   byref  ->  rax         "BlockOp address local"
; Lcl frame size = 104

G_M60975_IG01:
       4883EC68             sub      rsp, 104
       C5F877               vzeroupper 

G_M60975_IG02:
       488D4C2448           lea      rcx, bword ptr [rsp+48H]
       E8BF67FFFF           call     Program:CreateLargeStruct():struct
       C5FA6F442448         vmovdqu  xmm0, qword ptr [rsp+48H]
       C5FA7F442428         vmovdqu  qword ptr [rsp+28H], xmm0
       C5FA6F442458         vmovdqu  xmm0, qword ptr [rsp+58H]
       C5FA7F442438         vmovdqu  qword ptr [rsp+38H], xmm0
       488D442428           lea      rax, bword ptr [rsp+28H]
       488B10               mov      rdx, qword ptr [rax]
       488B4018             mov      rax, qword ptr [rax+24]

G_M60975_IG03:
       4883C468             add      rsp, 104
       C3                   ret      

; Total bytes of code 58, prolog size 7 for method Program:InlinedAssignment():long

and

; Assembly listing for method Program:InlinedCtor():long
;  V00 OutArgs      [V00    ] (  1,  1   )  lclBlk (32) [rsp+0x00]   "OutgoingArgSpace"
;  V01 tmp1         [V01,T00] (  2,  4   )  struct (32) [rsp+0x68]   do-not-enreg[SFB] "NewObj constructor temp"
;  V02 tmp2         [V02    ] (  2,  4   )  struct (32) [rsp+0x48]   do-not-enreg[XSB] addr-exposed "struct address for call/obj"
;  V03 tmp3         [V03,T01] (  2,  4   )  struct (32) [rsp+0x28]   do-not-enreg[SB] "Inlining Arg"
;
; Lcl frame size = 136

G_M31928_IG01:
       4881EC88000000       sub      rsp, 136
       C5F877               vzeroupper 

G_M31928_IG02:
       488D4C2448           lea      rcx, bword ptr [rsp+48H]
       E8BCFFFFFF           call     Program:CreateLargeStruct():struct
       C5FA6F442448         vmovdqu  xmm0, qword ptr [rsp+48H]
       C5FA7F442428         vmovdqu  qword ptr [rsp+28H], xmm0
       C5FA6F442458         vmovdqu  xmm0, qword ptr [rsp+58H]
       C5FA7F442438         vmovdqu  qword ptr [rsp+38H], xmm0
       C5FA6F442428         vmovdqu  xmm0, qword ptr [rsp+28H]
       C5FA7F442468         vmovdqu  qword ptr [rsp+68H], xmm0
       C5FA6F442438         vmovdqu  xmm0, qword ptr [rsp+38H]
       C5FA7F442478         vmovdqu  qword ptr [rsp+78H], xmm0
       488B842480000000     mov      rax, qword ptr [rsp+80H]

G_M31928_IG03:
       4881C488000000       add      rsp, 136
       C3                   ret      

; Total bytes of code 84, prolog size 10 for method Program:InlinedCtor():long

Some of these copies could be skipped?

/cc @AndyAyersMS @mikedn @stephentoub @jkotas

category:cq theme:structs skill-level:expert cost:large

About this issue

  • Original URL
  • State: closed
  • Created 5 years ago
  • Comments: 17 (17 by maintainers)

Most upvoted comments

I believe this was fixed by #64130. Codegen on main is:

; Method Program:InlinedAssignment():long
G_M53179_IG01:
       sub      rsp, 72
						;; size=4 bbWeight=1    PerfScore 0.25

G_M53179_IG02:
       lea      rcx, [rsp+28H]
       call     [Program:CreateLargeStruct():LargeStruct]
       mov      rax, qword ptr [rsp+40H]
						;; size=16 bbWeight=1    PerfScore 4.50

G_M53179_IG03:
       add      rsp, 72
       ret      
						;; size=5 bbWeight=1    PerfScore 1.25
; Total bytes of code: 25
; Method Program:InlinedCtor():long
G_M38338_IG01:
       sub      rsp, 72
						;; size=4 bbWeight=1    PerfScore 0.25

G_M38338_IG02:
       lea      rcx, [rsp+28H]
       call     [Program:CreateLargeStruct():LargeStruct]
       mov      rax, qword ptr [rsp+40H]
						;; size=16 bbWeight=1    PerfScore 4.50

G_M38338_IG03:
       add      rsp, 72
       ret      
						;; size=5 bbWeight=1    PerfScore 1.25
; Total bytes of code: 25