runtime: Regressions in System.Memory.Span
Found as part of 5.0 -> 6.0 comparison.
Run Information
Architecture | x64 |
---|---|
OS | Windows 10.0.18362 |
Baseline | a1fd4e98c87300110a3bdf55c400ec133e9202da |
Compare | 84bf5bcc43fe7a95f73f4b7cee9c0e23af248dd7 |
Regressions in System.Memory.Span<Int32>
Benchmark | Baseline | Test | Test/Base | Baseline IR | Compare IR | IR Ratio | Baseline ETL | Compare ETL |
---|---|---|---|---|---|---|---|---|
SequenceCompareTo | 655.53 ns | 796.30 ns | 1.21 |
Historical Data in Reporting System
Repro
git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f netcoreapp5.0 --filter 'System.Memory.Span<Int32>*'
Payloads
Histogram
System.Memory.Span<Int32>.SequenceCompareTo(Size: 512)
Docs
Profiling workflow for dotnet/runtime repository Benchmarking workflow for dotnet/runtime repository
Architecture | x64 |
---|---|
OS | Windows 10.0.18362 |
Baseline | a1fd4e98c87300110a3bdf55c400ec133e9202da |
Compare | 84bf5bcc43fe7a95f73f4b7cee9c0e23af248dd7 |
Regressions in System.Collections.Sort<BigStruct>
Benchmark | Baseline | Test | Test/Base | Baseline IR | Compare IR | IR Ratio | Baseline ETL | Compare ETL |
---|---|---|---|---|---|---|---|---|
List | 9.80 μs | 12.16 μs | 1.24 |
Historical Data in Reporting System
Repro
git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f netcoreapp5.0 --filter 'System.Collections.Sort<BigStruct>*'
Payloads
Histogram
System.Collections.Sort<BigStruct>.List(Size: 512)
Docs
Profiling workflow for dotnet/runtime repository Benchmarking workflow for dotnet/runtime repository
Run Information
Architecture | x64 |
---|---|
OS | Windows 10.0.18362 |
Baseline | a1fd4e98c87300110a3bdf55c400ec133e9202da |
Compare | 84bf5bcc43fe7a95f73f4b7cee9c0e23af248dd7 |
Regressions in System.Net.Tests.Perf_WebUtility
Benchmark | Baseline | Test | Test/Base | Baseline IR | Compare IR | IR Ratio | Baseline ETL | Compare ETL |
---|---|---|---|---|---|---|---|---|
Decode_NoDecodingRequired | 62.76 ns | 72.79 ns | 1.16 | Trace | Trace |
Historical Data in Reporting System
Repro
git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f netcoreapp5.0 --filter 'System.Net.Tests.Perf_WebUtility*'
Payloads
Histogram
System.Net.Tests.Perf_WebUtility.Decode_NoDecodingRequired
Baseline Jit Disasm
; System.Net.Tests.Perf_WebUtility.Decode_NoDecodingRequired()
mov rcx,29B98006708
mov rcx,[rcx]
mov rdx,29B98001B08
mov rdx,[rdx]
jmp near ptr System.Net.WebUtility.UrlDecodeInternal(System.String, System.Text.Encoding)
; Total bytes of code 31
; System.Net.WebUtility.UrlDecodeInternal(System.String, System.Text.Encoding)
push r15
push r14
push rdi
push rsi
push rbp
push rbx
sub rsp,48
vxorps xmm4,xmm4,xmm4
vmovdqa xmmword ptr [rsp+20],xmm4
vmovdqa xmmword ptr [rsp+30],xmm4
xor eax,eax
mov [rsp+40],rax
mov rsi,rcx
test rsi,rsi
je short M01_L00
cmp dword ptr [rsi+8],0
jne short M01_L01
M01_L00:
mov rax,rsi
add rsp,48
pop rbx
pop rbp
pop rsi
pop rdi
pop r14
pop r15
ret
M01_L01:
mov edi,[rsi+8]
mov [rsp+38],edi
mov [rsp+30],rdx
xor edx,edx
mov [rsp+20],rdx
mov [rsp+3C],edx
mov [rsp+40],edx
mov [rsp+28],rdx
xor ebx,ebx
xor ebp,ebp
xor r14d,r14d
test edi,edi
jle near ptr M01_L15
M01_L02:
movsxd rdx,r14d
movzx r15d,word ptr [rsi+rdx*2+0C]
cmp r15d,2B
jne short M01_L03
mov ebp,1
mov r15d,20
jmp near ptr M01_L09
M01_L03:
cmp r15d,25
jne near ptr M01_L09
lea edx,[rdi+0FFFE]
cmp r14d,edx
jge near ptr M01_L09
lea edx,[r14+1]
movsxd rdx,edx
movzx edx,word ptr [rsi+rdx*2+0C]
cmp edx,100
jge short M01_L04
movsxd rdx,edx
mov rcx,7FF83D2D0C88
movzx edx,byte ptr [rdx+rcx]
jmp short M01_L05
M01_L04:
mov edx,0FF
M01_L05:
lea ecx,[r14+2]
movsxd rax,ecx
movzx eax,word ptr [rsi+rax*2+0C]
cmp eax,100
jge short M01_L06
movsxd rax,eax
mov r8,7FF83D2D0C88
movzx eax,byte ptr [rax+r8]
jmp short M01_L07
M01_L06:
mov eax,0FF
M01_L07:
mov r8d,edx
or r8d,eax
cmp r8d,0FF
je short M01_L09
shl edx,4
or edx,eax
movzx ebx,dl
mov r14d,ecx
cmp qword ptr [rsp+28],0
jne short M01_L08
mov edx,[rsp+38]
movsxd rdx,edx
mov rcx,offset MT_System.Byte[]
call CORINFO_HELP_NEWARR_1_VC
mov [rsp+28],rax
M01_L08:
mov edx,[rsp+40]
mov rcx,[rsp+28]
lea eax,[rdx+1]
mov [rsp+40],eax
cmp edx,[rcx+8]
jae near ptr M01_L18
movsxd rdx,edx
mov [rcx+rdx+10],bl
mov ebx,1
jmp near ptr M01_L14
M01_L09:
test r15d,0FF80
jne short M01_L11
movzx r15d,r15b
cmp qword ptr [rsp+28],0
jne short M01_L10
mov edx,[rsp+38]
movsxd rdx,edx
mov rcx,offset MT_System.Byte[]
call CORINFO_HELP_NEWARR_1_VC
mov [rsp+28],rax
M01_L10:
mov ecx,[rsp+40]
mov rax,[rsp+28]
lea edx,[rcx+1]
mov [rsp+40],edx
cmp ecx,[rax+8]
jae near ptr M01_L18
movsxd rcx,ecx
mov [rax+rcx+10],r15b
jmp short M01_L14
M01_L11:
cmp dword ptr [rsp+40],0
jle short M01_L12
lea rcx,[rsp+20]
call System.Net.WebUtility+UrlDecoder.FlushBytes()
M01_L12:
cmp qword ptr [rsp+20],0
jne short M01_L13
mov edx,[rsp+38]
movsxd rdx,edx
mov rcx,offset MT_System.Char[]
call CORINFO_HELP_NEWARR_1_VC
mov [rsp+20],rax
M01_L13:
mov ecx,[rsp+3C]
mov rdx,[rsp+20]
lea r8d,[rcx+1]
mov [rsp+3C],r8d
cmp ecx,[rdx+8]
jae short M01_L18
movsxd rcx,ecx
mov [rdx+rcx*2+10],r15w
M01_L14:
inc r14d
cmp r14d,edi
jl near ptr M01_L02
M01_L15:
test ebx,ebx
jne short M01_L17
test ebp,ebp
je short M01_L16
mov rcx,rsi
mov edx,2B
mov r8d,20
call System.String.Replace(Char, Char)
nop
add rsp,48
pop rbx
pop rbp
pop rsi
pop rdi
pop r14
pop r15
ret
M01_L16:
mov rax,rsi
add rsp,48
pop rbx
pop rbp
pop rsi
pop rdi
pop r14
pop r15
ret
M01_L17:
lea rcx,[rsp+20]
call System.Net.WebUtility+UrlDecoder.GetString()
nop
add rsp,48
pop rbx
pop rbp
pop rsi
pop rdi
pop r14
pop r15
ret
M01_L18:
call CORINFO_HELP_RNGCHKFAIL
int 3
; Total bytes of code 622
Compare Jit Disasm
; System.Net.Tests.Perf_WebUtility.Decode_NoDecodingRequired()
mov rcx,1B19ECE6708
mov rcx,[rcx]
mov rdx,1B19ECE1B10
mov rdx,[rdx]
jmp near ptr System.Net.WebUtility.UrlDecodeInternal(System.String, System.Text.Encoding)
; Total bytes of code 31
; System.Net.WebUtility.UrlDecodeInternal(System.String, System.Text.Encoding)
push r15
push r14
push r13
push r12
push rdi
push rsi
push rbp
push rbx
sub rsp,48
vxorps xmm4,xmm4,xmm4
vmovdqa xmmword ptr [rsp+20],xmm4
vmovdqa xmmword ptr [rsp+30],xmm4
xor eax,eax
mov [rsp+40],rax
mov rsi,rcx
test rsi,rsi
je short M01_L00
mov edi,[rsi+8]
test edi,edi
jne short M01_L01
M01_L00:
mov rax,rsi
add rsp,48
pop rbx
pop rbp
pop rsi
pop rdi
pop r12
pop r13
pop r14
pop r15
ret
M01_L01:
mov edi,[rsi+8]
mov [rsp+38],edi
mov [rsp+30],rdx
xor edx,edx
mov [rsp+20],rdx
mov [rsp+3C],edx
mov [rsp+40],edx
mov [rsp+28],rdx
xor ebx,ebx
xor ebp,ebp
xor r14d,r14d
test edi,edi
jle near ptr M01_L13
M01_L02:
movsxd rdx,r14d
movzx r15d,word ptr [rsi+rdx*2+0C]
cmp r15d,2B
jne short M01_L03
mov ebp,1
mov r15d,20
jmp near ptr M01_L07
M01_L03:
cmp r15d,25
jne near ptr M01_L07
lea edx,[rdi+0FFFE]
cmp r14d,edx
jge near ptr M01_L07
lea edx,[r14+1]
movsxd rdx,edx
movzx edx,word ptr [rsi+rdx*2+0C]
cmp edx,100
jge near ptr M01_L16
movsxd rdx,edx
mov rcx,7FF828D6E728
movzx r12d,byte ptr [rdx+rcx]
M01_L04:
lea r13d,[r14+2]
movsxd rdx,r13d
movzx edx,word ptr [rsi+rdx*2+0C]
cmp edx,100
jge near ptr M01_L17
movsxd rdx,edx
mov rcx,7FF828D6E728
movzx edx,byte ptr [rdx+rcx]
M01_L05:
mov ecx,r12d
or ecx,edx
cmp ecx,0FF
je short M01_L07
shl r12d,4
or edx,r12d
movzx ebx,dl
mov r14d,r13d
cmp qword ptr [rsp+28],0
jne short M01_L06
mov edx,[rsp+38]
movsxd rdx,edx
mov rcx,offset MT_System.Byte[]
call CORINFO_HELP_NEWARR_1_VC
mov [rsp+28],rax
M01_L06:
mov edx,[rsp+40]
mov rcx,[rsp+28]
lea eax,[rdx+1]
mov [rsp+40],eax
cmp edx,[rcx+8]
jae near ptr M01_L18
movsxd rdx,edx
mov [rcx+rdx+10],bl
mov ebx,1
jmp near ptr M01_L12
M01_L07:
test r15d,0FF80
jne short M01_L09
movzx r15d,r15b
cmp qword ptr [rsp+28],0
jne short M01_L08
mov edx,[rsp+38]
movsxd rdx,edx
mov rcx,offset MT_System.Byte[]
call CORINFO_HELP_NEWARR_1_VC
mov [rsp+28],rax
M01_L08:
mov ecx,[rsp+40]
mov rax,[rsp+28]
lea edx,[rcx+1]
mov [rsp+40],edx
cmp ecx,[rax+8]
jae near ptr M01_L18
movsxd rcx,ecx
mov [rax+rcx+10],r15b
jmp short M01_L12
M01_L09:
cmp dword ptr [rsp+40],0
jle short M01_L10
lea rcx,[rsp+20]
call System.Net.WebUtility+UrlDecoder.FlushBytes()
M01_L10:
cmp qword ptr [rsp+20],0
jne short M01_L11
mov edx,[rsp+38]
movsxd rdx,edx
mov rcx,offset MT_System.Char[]
call CORINFO_HELP_NEWARR_1_VC
mov [rsp+20],rax
M01_L11:
mov ecx,[rsp+3C]
mov rdx,[rsp+20]
lea r8d,[rcx+1]
mov [rsp+3C],r8d
cmp ecx,[rdx+8]
jae near ptr M01_L18
movsxd rcx,ecx
mov [rdx+rcx*2+10],r15w
M01_L12:
inc r14d
cmp r14d,edi
jl near ptr M01_L02
M01_L13:
test ebx,ebx
jne short M01_L15
test ebp,ebp
je short M01_L14
mov rcx,rsi
mov edx,2B
mov r8d,20
call System.String.Replace(Char, Char)
nop
add rsp,48
pop rbx
pop rbp
pop rsi
pop rdi
pop r12
pop r13
pop r14
pop r15
ret
M01_L14:
mov rax,rsi
add rsp,48
pop rbx
pop rbp
pop rsi
pop rdi
pop r12
pop r13
pop r14
pop r15
ret
M01_L15:
lea rcx,[rsp+20]
call System.Net.WebUtility+UrlDecoder.GetString()
nop
add rsp,48
pop rbx
pop rbp
pop rsi
pop rdi
pop r12
pop r13
pop r14
pop r15
ret
M01_L16:
mov r12d,0FF
jmp near ptr M01_L04
M01_L17:
mov edx,0FF
jmp near ptr M01_L05
M01_L18:
call CORINFO_HELP_RNGCHKFAIL
int 3
; Total bytes of code 663
Docs
Profiling workflow for dotnet/runtime repository Benchmarking workflow for dotnet/runtime repository
category:performance theme:benchmarks
About this issue
- Original URL
- State: open
- Created 3 years ago
- Comments: 19 (19 by maintainers)
One other trick I found useful. I have before and after jits and they’re compatible. So I can run using one build variant (say before) and load up the other variant’s jit as the altjit, and experiment with fine-grained control over which jit handles which methods.
So if I allow the after jit to compile
PickPivotAndPartition
in the before build, I get slow runs; if allow the before jit to compilePickPivotAndPartition
I get fast runs.To make this work you have to copy the respective checked jits into the release test roots because selective alt-jitting is only possible with checked jits, and you have copy in the other build’s checked jit with a different name.
EG (here “2” is after, “3” is before)
Agree – I need to drill in.
Interesting that the
Byte
andChar
version ofCompareTo
were unaffected.