runtime: Task/TaskAwaiter completion makes lots of duplicate memory reads
public async Task CompletedTask()
{
for (int i = 0; i < 100; i++)
await Task.CompletedTask;
}
Inlines into 06000007 <CompletedTask>d__3:MoveNext():this
[1 IL=0019 TR=000066 06001F0C] [below ALWAYS_INLINE size] Task:get_CompletedTask():ref
[2 IL=0024 TR=000069 06001F2E] [below ALWAYS_INLINE size] Task:GetAwaiter():struct:this
[3 IL=0001 TR=000172 060049ED] [below ALWAYS_INLINE size] TaskAwaiter:.ctor(ref):this
[4 IL=0032 TR=000079 060049EE] [below ALWAYS_INLINE size] TaskAwaiter:get_IsCompleted():bool:this
[5 IL=0006 TR=000194 06001F03] [below ALWAYS_INLINE size] Task:get_IsCompleted():bool:this
[6 IL=0010 TR=000206 06001F04] [below ALWAYS_INLINE size] Task:IsCompletedMethod(int):bool
[7 IL=0064 TR=000109 060048CB] [below ALWAYS_INLINE size] AsyncTaskMethodBuilder:AwaitUnsafeOnCompleted(byref,byref):this
[0 IL=0008 TR=000227 060048D5] [FAILED: has exception handling] AsyncTaskMethodBuilder`1:AwaitUnsafeOnCompleted(byref,byref):this
[8 IL=0101 TR=000044 060049F1] [below ALWAYS_INLINE size] TaskAwaiter:GetResult():this
[9 IL=0006 TR=000239 060049F2] [below ALWAYS_INLINE size] TaskAwaiter:ValidateEnd(ref)
[10 IL=0001 TR=000245 06001EE7] [aggressive inline attribute] Task:get_IsWaitNotificationEnabledOrNotRanToCompletion():bool:this
[11 IL=0009 TR=000255 060049F3] [profitable inline] TaskAwaiter:HandleNonSuccessAndDebuggerNotification(ref)
[12 IL=0001 TR=000272 06001F03] [below ALWAYS_INLINE size] Task:get_IsCompleted():bool:this
[13 IL=0010 TR=000327 06001F04] [below ALWAYS_INLINE size] Task:IsCompletedMethod(int):bool
[14 IL=0019 TR=000310 06001F39] [below ALWAYS_INLINE size] Task:InternalWait(int,struct):bool:this
[0 IL=0026 TR=000281 06001EE5] [FAILED: unprofitable inline] Task:NotifyDebuggerOfWaitCompletionIfNecessary():bool:this
[15 IL=0033 TR=000288 06001F05] [profitable inline] Task:get_IsCompletedSuccessfully():bool:this
[0 IL=0041 TR=000298 060049F4] [FAILED: noinline per IL/cached result] TaskAwaiter:ThrowForNonSuccess(ref)
[0 IL=0150 TR=000151 060048CE] [FAILED: within catch region] AsyncTaskMethodBuilder:SetException(ref):this
[16 IL=0171 TR=000126 060048CD] [profitable inline] AsyncTaskMethodBuilder:SetResult():this
[17 IL=0011 TR=000368 060048DE] [profitable inline] AsyncTaskMethodBuilder`1:SetResult(ref):this
[0 IL=0026 TR=000390 060048DC] [FAILED: noinline per IL/cached result] AsyncTaskMethodBuilder`1:SetExistingTaskResult(struct):this
; V00 this [V00,T01] ( 22, 40 ) byref -> [rbp+0x10] do-not-enreg[H] this
; V01 loc0 [V01,T12] ( 2, 2 ) int -> rcx
; V02 loc1 [V02 ] ( 6, 19 ) struct ( 8) [rbp-0x10] do-not-enreg[XS] must-init addr-exposed ld-addr-op
; V03 loc2 [V03,T04] ( 2, 16 ) int -> rdx
;* V04 loc3 [V04 ] ( 0, 0 ) ref -> zero-ref class-hnd
; V05 OutArgs [V05 ] ( 1, 1 ) lclBlk (32) [rsp+0x00] "OutgoingArgSpace"
;* V06 tmp1 [V06 ] ( 0, 0 ) int -> zero-ref "dup spill"
; V07 tmp2 [V07,T15] ( 2, 0 ) ref -> rdx class-hnd "impSpillSpecialSideEff"
; V08 tmp3 [V08,T02] ( 3, 24 ) ref -> rdx class-hnd "Inlining Arg"
;* V09 tmp4 [V09 ] ( 0, 0 ) struct ( 8) zero-ref "NewObj constructor temp"
; V10 tmp5 [V10,T03] ( 2, 16 ) ref -> rdx class-hnd "Inlining Arg"
; V11 tmp6 [V11,T06] ( 2, 8 ) int -> rdx "Inline stloc first use temp"
; V12 tmp7 [V12,T07] ( 2, 4 ) byref -> rcx "Inlining Arg"
; V13 tmp8 [V13,T00] ( 7, 44 ) ref -> rsi class-hnd "Inlining Arg"
;* V14 tmp9 [V14 ] ( 0, 0 ) struct ( 8) zero-ref ld-addr-op "Inline ldloca(s) first use temp"
;* V15 tmp10 [V15 ] ( 0, 0 ) bool -> zero-ref "Inline stloc first use temp"
; V16 tmp11 [V16,T10] ( 2, 4 ) int -> rcx "Inline stloc first use temp"
;* V17 tmp12 [V17 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
; V18 tmp13 [V18,T08] ( 2, 4 ) byref -> rcx "Inlining Arg"
; V19 tmp14 [V19,T05] ( 4, 8 ) byref -> rcx "Inlining Arg"
; V20 tmp15 [V20,T13] ( 2, 2 ) struct ( 8) [rbp-0x18] do-not-enreg[SF] must-init ld-addr-op "Inline ldloca(s) first use temp"
; V21 tmp16 [V21,T09] ( 2, 4 ) ref -> rdx class-hnd exact "Inlining Arg"
; V22 tmp17 [V22 ] ( 6, 19 ) ref -> [rbp-0x10] do-not-enreg[X] addr-exposed V02.m_task(offs=0x00) P-DEP "field V02.m_task (fldOffset=0x0)"
;* V23 tmp18 [V23 ] ( 0, 0 ) ref -> zero-ref V09.m_task(offs=0x00) P-INDEP "field V09.m_task (fldOffset=0x0)"
;* V24 tmp19 [V24,T11] ( 0, 0 ) ref -> zero-ref V14._source(offs=0x00) P-INDEP "field V14._source (fldOffset=0x0)"
;* V25 tmp20 [V25,T14] ( 0, 0 ) ref -> zero-ref V17._source(offs=0x00) P-INDEP "field V17._source (fldOffset=0x0)"
; V26 PSPSym [V26 ] ( 1, 1 ) long -> [rbp-0x20] do-not-enreg[X] addr-exposed "PSPSym"
;* V27 tmp22 [V27 ] ( 0, 0 ) byref -> zero-ref "optAddCopies"
G_M10160_IG01:
55 push rbp
56 push rsi
4883EC38 sub rsp, 56
488D6C2440 lea rbp, [rsp+40H]
33C0 xor rax, rax
488945F0 mov qword ptr [rbp-10H], rax
488945E8 mov qword ptr [rbp-18H], rax
488965E0 mov qword ptr [rbp-20H], rsp
48894D10 mov bword ptr [rbp+10H], rcx ; set [rbp+10H] from rcx
G_M10160_IG02:
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
8B09 mov ecx, dword ptr [rcx]
G_M10160_IG03:
85C9 test ecx, ecx
0F8486000000 je G_M10160_IG05
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
33D2 xor edx, edx
895104 mov dword ptr [rcx+4], edx
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
83790464 cmp dword ptr [rcx+4], 100
0F8DFF000000 jge G_M10160_IG09
G_M10160_IG04: ; ** for body start
48B9783EECCFFB7F0000 mov rcx, 0x7FFBCFEC3E78
BA40020000 mov edx, 576
E81AACBA5F call CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
48BAF8159DDC14020000 mov rdx, 0x214DC9D15F8
488B12 mov rdx, gword ptr [rdx]
8B0A mov ecx, dword ptr [rdx]
488955F0 mov gword ptr [rbp-10H], rdx
488B55F0 mov rdx, gword ptr [rbp-10H]
8B5234 mov edx, dword ptr [rdx+52]
F7C200006001 test edx, 0x1600000
755A jne SHORT G_M10160_IG06
488B5510 mov rdx, bword ptr [rbp+10H] ; read [rbp+10H] to rdx
33C9 xor ecx, ecx
890A mov dword ptr [rdx], ecx
488B5510 mov rdx, bword ptr [rbp+10H] ; read [rbp+10H] to rdx
488D4A10 lea rcx, bword ptr [rdx+16]
488B55F0 mov rdx, gword ptr [rbp-10H]
E8EF93BA5F call CORINFO_HELP_CHECKED_ASSIGN_REF
488B5510 mov rdx, bword ptr [rbp+10H] ; read [rbp+10H] to rdx
3912 cmp dword ptr [rdx], edx
488B5510 mov rdx, bword ptr [rbp+10H] ; read [rbp+10H] to rdx
488D4A08 lea rcx, bword ptr [rdx+8]
488D55F0 lea rdx, bword ptr [rbp-10H]
4C8B4510 mov r8, bword ptr [rbp+10H]
E8D4F5FFFF call AsyncTaskMethodBuilder`1:AwaitUnsafeOnCompleted(byref,byref):this
E9D0000000 jmp G_M10160_IG11
G_M10160_IG05:
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
488B4910 mov rcx, gword ptr [rcx+16]
48894DF0 mov gword ptr [rbp-10H], rcx
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
4533C0 xor r8, r8
4C894110 mov qword ptr [rcx+16], r8
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
C701FFFFFFFF mov dword ptr [rcx], -1
G_M10160_IG06:
488B75F0 mov rsi, gword ptr [rbp-10H]
8B4E34 mov ecx, dword ptr [rsi+52]
81E100000011 and ecx, 0x11000000
81F900000001 cmp ecx, 0x1000000
743C je SHORT G_M10160_IG08
8B4E34 mov ecx, dword ptr [rsi+52]
F7C100006001 test ecx, 0x1600000
7510 jne SHORT G_M10160_IG07
488BCE mov rcx, rsi
4533C0 xor r8, r8
BAFFFFFFFF mov edx, -1
E83E27AC5A call Task:InternalWaitCore(int,struct):bool:this
G_M10160_IG07:
488BCE mov rcx, rsi
E8B600AC5A call Task:NotifyDebuggerOfWaitCompletionIfNecessary():bool:this
8B4E34 mov ecx, dword ptr [rsi+52]
81E100006001 and ecx, 0x1600000
81F900000001 cmp ecx, 0x1000000
7408 je SHORT G_M10160_IG08
488BCE mov rcx, rsi
E8DDEBB05A call TaskAwaiter:ThrowForNonSuccess(ref)
G_M10160_IG08:
488B5510 mov rdx, bword ptr [rbp+10H] ; read [rbp+10H] to rdx
8B5204 mov edx, dword ptr [rdx+4]
FFC2 inc edx
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
895104 mov dword ptr [rcx+4], edx
488B5510 mov rdx, bword ptr [rbp+10H] ; read [rbp+10H] to rdx
837A0464 cmp dword ptr [rdx+4], 100
0F8C01FFFFFF jl G_M10160_IG04 ; ** for body end
G_M10160_IG09:
488B5510 mov rdx, bword ptr [rbp+10H] ; read [rbp+10H] to rdx
C702FEFFFFFF mov dword ptr [rdx], -2
488B5510 mov rdx, bword ptr [rbp+10H] ; read [rbp+10H] to rdx
3912 cmp dword ptr [rdx], edx
488B5510 mov rdx, bword ptr [rbp+10H] ; read [rbp+10H] to rdx
488D4A08 lea rcx, bword ptr [rdx+8]
48BA88259DDC14020000 mov rdx, 0x214DC9D2588
488B12 mov rdx, gword ptr [rdx]
48833900 cmp gword ptr [rcx], 0
7507 jne SHORT G_M10160_IG10
E80F93BA5F call CORINFO_HELP_CHECKED_ASSIGN_REF
EB0E jmp SHORT G_M10160_IG11
G_M10160_IG10:
C645E800 mov byte ptr [rbp-18H], 0
480FBE55E8 movsx rdx, byte ptr [rbp-18H]
E8FFCCB05A call AsyncTaskMethodBuilder`1:SetExistingTaskResult(struct):this
G_M10160_IG11:
90 nop
G_M10160_IG12:
488D65F8 lea rsp, [rbp-08H]
5E pop rsi
5D pop rbp
C3 ret
G_M10160_IG13:
55 push rbp
56 push rsi
4883EC28 sub rsp, 40
488B6920 mov rbp, qword ptr [rcx+32]
48896C2420 mov qword ptr [rsp+20H], rbp
488D6D40 lea rbp, [rbp+40H]
G_M10160_IG14:
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
C701FEFFFFFF mov dword ptr [rcx], -2
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
3909 cmp dword ptr [rcx], ecx
488B4D10 mov rcx, bword ptr [rbp+10H] ; read [rbp+10H] to rcx
4883C108 add rcx, 8
E8A716B35A call AsyncTaskMethodBuilder:SetException(ref):this
488D05C1FFFFFF lea rax, G_M10160_IG11
G_M10160_IG15:
4883C428 add rsp, 40
5E pop rsi
5D pop rbp
C3 ret
; Total bytes of code 455, prolog size 29 for method <CompletedTask>d__3:MoveNext():this
Can this be elided where its not changing the register (as its just set the line or so before)
category:cq theme:cse skill-level:expert cost:extra-large impact:large
About this issue
- Original URL
- State: open
- Created 5 years ago
- Comments: 31 (31 by maintainers)
Commits related to this issue
- Put single-def CSEs into SSA If we create a single-def temp during SSE, add it into SSA so uses of the temp can benefit from assertion prop and similar. Follow-up from some work proposed for #21973. — committed to AndyAyersMS/coreclr by AndyAyersMS 5 years ago
- Put single-def CSEs into SSA (#26986) If we create a single-def temp during SSE, add it into SSA so uses of the temp can benefit from assertion prop and similar. Follow-up from some work proposed... — committed to dotnet/coreclr by AndyAyersMS 5 years ago
- Put single-def CSEs into SSA (#26986) If we create a single-def temp during SSE, add it into SSA so uses of the temp can benefit from assertion prop and similar. Follow-up from some work proposed... — committed to SrivastavaAnubhav/coreclr by AndyAyersMS 5 years ago
- - Change `gtIsLikelyRegVar` to return false for defs of EHvars, since they always go to memory. - Never split a block if the only resolution moves are for EH vars. - Add a test case to enable diff tra... — committed to CarolEidt/coreclr by CarolEidt 5 years ago
This shows some promise – on the example above (and with dotnet/coreclr#21944):
and more widely there are some nice wins and a net win, but also some regressions to investigate: