Untitled diff

Created Diff never expires
32 removals
Words removed48
Total words976
Words removed (%)4.92
112 lines
31 additions
Words added34
Total words962
Words added (%)3.53
111 lines
; Assembly listing for method System.HexConverter:EncodeToUtf16_Vector128(System.ReadOnlySpan`1[ubyte],System.Span`1[ushort],uint) (FullOpts)
; Assembly listing for method System.HexConverter:EncodeToUtf16_Vector128(System.ReadOnlySpan`1[ubyte],System.Span`1[ushort],uint) (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; FullOpts code
; optimized code
; optimized code
; rbp based frame
; rbp based frame
; fully interruptible
; fully interruptible
; No PGO data
; No PGO data
; 0 inlinees with PGO data; 7 single block inlinees; 4 inlinees without PGO data
; 0 inlinees with PGO data; 7 single block inlinees; 4 inlinees without PGO data
; Final local variable assignments
; Final local variable assignments
;
;
;* V00 arg0 [V00 ] ( 0, 0 ) struct (16) zero-ref multireg-arg ld-addr-op single-def <System.ReadOnlySpan`1[ubyte]>
;* V00 arg0 [V00 ] ( 0, 0 ) struct (16) zero-ref multireg-arg ld-addr-op single-def <System.ReadOnlySpan`1[ubyte]>
;* V01 arg1 [V01 ] ( 0, 0 ) struct (16) zero-ref multireg-arg single-def <System.Span`1[ushort]>
;* V01 arg1 [V01 ] ( 0, 0 ) struct (16) zero-ref multireg-arg single-def <System.Span`1[ushort]>
; V02 arg2 [V02,T05] ( 3, 3 ) int -> r8 single-def
; V02 arg2 [V02,T04] ( 3, 3 ) int -> r8 single-def
; V03 loc0 [V03,T03] ( 2, 9 ) byref -> rdi single-def
; V03 loc0 [V03,T02] ( 2, 9 ) byref -> rdi single-def
; V04 loc1 [V04,T04] ( 2, 9 ) byref -> rdx single-def
; V04 loc1 [V04,T03] ( 2, 9 ) byref -> rdx single-def
; V05 loc2 [V05,T15] ( 2, 9 ) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[ubyte]>
; V05 loc2 [V05,T14] ( 2, 9 ) simd16 -> mm0 <System.Runtime.Intrinsics.Vector128`1[ubyte]>
; V06 loc3 [V06,T00] ( 8, 49 ) long -> rax
; V06 loc3 [V06,T00] ( 8, 41 ) long -> rax
; V07 loc4 [V07,T02] ( 3, 9 ) long -> rsi
; V07 loc4 [V07,T01] ( 4, 17 ) long -> rcx
;# V08 OutArgs [V08 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;# V08 OutArgs [V08 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V09 tmp1 [V09,T17] ( 3, 2 ) simd16 -> mm0
; V09 tmp1 [V09,T16] ( 3, 2 ) simd16 -> mm0
;* V10 tmp2 [V10 ] ( 0, 0 ) struct (32) zero-ref "location for address-of(RValue)" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]>
;* V10 tmp2 [V10 ] ( 0, 0 ) struct (32) zero-ref "location for address-of(RValue)" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]>
;* V11 tmp3 [V11 ] ( 0, 0 ) struct (32) zero-ref "location for address-of(RValue)" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ushort],System.Runtime.Intrinsics.Vector128`1[ushort]]>
;* V11 tmp3 [V11 ] ( 0, 0 ) struct (32) zero-ref "location for address-of(RValue)" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ushort],System.Runtime.Intrinsics.Vector128`1[ushort]]>
;* V12 tmp4 [V12 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
;* V12 tmp4 [V12 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.ReadOnlySpan`1[ubyte]>
;* V13 tmp5 [V13 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.Span`1[ushort]>
;* V13 tmp5 [V13 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg" <System.Span`1[ushort]>
; V14 tmp6 [V14,T10] ( 3, 48 ) simd16 -> mm2 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
; V14 tmp6 [V14,T09] ( 3, 48 ) simd16 -> mm2 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
; V15 tmp7 [V15,T11] ( 2, 32 ) simd16 -> mm3 "spilled call-like call argument"
; V15 tmp7 [V15,T10] ( 2, 32 ) simd16 -> mm3 "spilled call-like call argument"
; V16 tmp8 [V16,T14] ( 2, 16 ) simd16 -> mm2 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
; V16 tmp8 [V16,T13] ( 2, 16 ) simd16 -> mm2 "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V17 tmp9 [V17 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V17 tmp9 [V17 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
; V18 tmp10 [V18,T12] ( 2, 32 ) simd16 -> mm2 "impAppendStmt"
; V18 tmp10 [V18,T11] ( 2, 32 ) simd16 -> mm2 "impAppendStmt"
;* V19 tmp11 [V19 ] ( 0, 0 ) struct (32) zero-ref ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]>
;* V19 tmp11 [V19 ] ( 0, 0 ) struct (32) zero-ref ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte]]>
;* V20 tmp12 [V20 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
;* V20 tmp12 [V20 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
;* V21 tmp13 [V21 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V21 tmp13 [V21 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V22 tmp14 [V22 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V22 tmp14 [V22 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V23 tmp15 [V23 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V23 tmp15 [V23 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V24 tmp16 [V24 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V24 tmp16 [V24 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V25 tmp17 [V25 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V25 tmp17 [V25 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V26 tmp18 [V26 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V26 tmp18 [V26 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ubyte]>
;* V27 tmp19 [V27 ] ( 0, 0 ) struct (32) zero-ref ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ushort],System.Runtime.Intrinsics.Vector128`1[ushort]]>
;* V27 tmp19 [V27 ] ( 0, 0 ) struct (32) zero-ref ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[System.Runtime.Intrinsics.Vector128`1[ushort],System.Runtime.Intrinsics.Vector128`1[ushort]]>
; V28 tmp20 [V28,T13] ( 2, 32 ) simd16 -> mm2 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
; V28 tmp20 [V28,T12] ( 2, 32 ) simd16 -> mm2 "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
;* V29 tmp21 [V29 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
;* V29 tmp21 [V29 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg" <System.Runtime.Intrinsics.Vector128`1[ushort]>
; V30 tmp22 [V30,T06] ( 2, 2 ) byref -> rdi single-def "field V00._reference (fldOffset=0x0)" P-INDEP
; V30 tmp22 [V30,T05] ( 2, 2 ) byref -> rdi single-def "field V00._reference (fldOffset=0x0)" P-INDEP
; V31 tmp23 [V31,T08] ( 2, 2 ) int -> rsi single-def "field V00._length (fldOffset=0x8)" P-INDEP
; V31 tmp23 [V31,T07] ( 2, 2 ) int -> rsi single-def "field V00._length (fldOffset=0x8)" P-INDEP
; V32 tmp24 [V32,T07] ( 2, 2 ) byref -> rdx single-def "field V01._reference (fldOffset=0x0)" P-INDEP
; V32 tmp24 [V32,T06] ( 2, 2 ) byref -> rdx single-def "field V01._reference (fldOffset=0x0)" P-INDEP
; V33 tmp25 [V33,T09] ( 1, 1 ) int -> rcx single-def "field V01._length (fldOffset=0x8)" P-INDEP
; V33 tmp25 [V33,T08] ( 1, 1 ) int -> rcx single-def "field V01._length (fldOffset=0x8)" P-INDEP
;* V34 tmp26 [V34 ] ( 0, 0 ) simd16 -> zero-ref "field V10.Item1 (fldOffset=0x0)" P-INDEP
;* V34 tmp26 [V34 ] ( 0, 0 ) simd16 -> zero-ref "field V10.Item1 (fldOffset=0x0)" P-INDEP
;* V35 tmp27 [V35 ] ( 0, 0 ) simd16 -> zero-ref "field V10.Item2 (fldOffset=0x10)" P-INDEP
;* V35 tmp27 [V35 ] ( 0, 0 ) simd16 -> zero-ref "field V10.Item2 (fldOffset=0x10)" P-INDEP
;* V36 tmp28 [V36 ] ( 0, 0 ) simd16 -> zero-ref "field V11.Item1 (fldOffset=0x0)" P-INDEP
;* V36 tmp28 [V36 ] ( 0, 0 ) simd16 -> zero-ref "field V11.Item1 (fldOffset=0x0)" P-INDEP
;* V37 tmp29 [V37 ] ( 0, 0 ) simd16 -> zero-ref "field V11.Item2 (fldOffset=0x10)" P-INDEP
;* V37 tmp29 [V37 ] ( 0, 0 ) simd16 -> zero-ref "field V11.Item2 (fldOffset=0x10)" P-INDEP
;* V38 tmp30 [V38 ] ( 0, 0 ) byref -> zero-ref single-def "field V12._reference (fldOffset=0x0)" P-INDEP
;* V38 tmp30 [V38 ] ( 0, 0 ) byref -> zero-ref single-def "field V12._reference (fldOffset=0x0)" P-INDEP
;* V39 tmp31 [V39 ] ( 0, 0 ) int -> zero-ref "field V12._length (fldOffset=0x8)" P-INDEP
;* V39 tmp31 [V39 ] ( 0, 0 ) int -> zero-ref "field V12._length (fldOffset=0x8)" P-INDEP
;* V40 tmp32 [V40 ] ( 0, 0 ) byref -> zero-ref single-def "field V13._reference (fldOffset=0x0)" P-INDEP
;* V40 tmp32 [V40 ] ( 0, 0 ) byref -> zero-ref single-def "field V13._reference (fldOffset=0x0)" P-INDEP
;* V41 tmp33 [V41 ] ( 0, 0 ) int -> zero-ref "field V13._length (fldOffset=0x8)" P-INDEP
;* V41 tmp33 [V41 ] ( 0, 0 ) int -> zero-ref "field V13._length (fldOffset=0x8)" P-INDEP
;* V42 tmp34 [V42 ] ( 0, 0 ) simd16 -> zero-ref "field V19.Item1 (fldOffset=0x0)" P-INDEP
;* V42 tmp34 [V42 ] ( 0, 0 ) simd16 -> zero-ref "field V19.Item1 (fldOffset=0x0)" P-INDEP
;* V43 tmp35 [V43 ] ( 0, 0 ) simd16 -> zero-ref "field V19.Item2 (fldOffset=0x10)" P-INDEP
;* V43 tmp35 [V43 ] ( 0, 0 ) simd16 -> zero-ref "field V19.Item2 (fldOffset=0x10)" P-INDEP
;* V44 tmp36 [V44 ] ( 0, 0 ) simd16 -> zero-ref "field V27.Item1 (fldOffset=0x0)" P-INDEP
;* V44 tmp36 [V44 ] ( 0, 0 ) simd16 -> zero-ref "field V27.Item1 (fldOffset=0x0)" P-INDEP
;* V45 tmp37 [V45 ] ( 0, 0 ) simd16 -> zero-ref "field V27.Item2 (fldOffset=0x10)" P-INDEP
;* V45 tmp37 [V45 ] ( 0, 0 ) simd16 -> zero-ref "field V27.Item2 (fldOffset=0x10)" P-INDEP
; V46 cse0 [V46,T16] ( 2, 9 ) simd16 -> mm1 hoist "CSE #02: aggressive"
; V46 cse0 [V46,T15] ( 2, 9 ) simd16 -> mm1 hoist "CSE #01: aggressive"
; V47 cse1 [V47,T01] ( 3, 10 ) long -> rcx "CSE #01: aggressive"
;
;
; Lcl frame size = 0
; Lcl frame size = 0


G_M10695_IG01:
G_M10695_IG01:
push rbp
push rbp
mov rbp, rsp
mov rbp, rsp
;; size=4 bbWeight=1 PerfScore 1.25
;; size=4 bbWeight=1 PerfScore 1.25
G_M10695_IG02:
G_M10695_IG02:
test r8d, r8d
test r8d, r8d
jne SHORT G_M10695_IG04
jne SHORT G_M10695_IG04
;; size=5 bbWeight=1 PerfScore 1.25
;; size=5 bbWeight=1 PerfScore 1.25
G_M10695_IG03:
G_M10695_IG03:
vmovups xmm0, xmmword ptr [reloc @RWD00]
vmovups xmm0, xmmword ptr [reloc @RWD00]
jmp SHORT G_M10695_IG05
jmp SHORT G_M10695_IG05
align [0 bytes for IG06]
align [0 bytes for IG06]
;; size=10 bbWeight=0.50 PerfScore 2.50
;; size=10 bbWeight=0.50 PerfScore 2.50
G_M10695_IG04:
G_M10695_IG04:
vmovups xmm0, xmmword ptr [reloc @RWD16]
vmovups xmm0, xmmword ptr [reloc @RWD16]
;; size=8 bbWeight=0.50 PerfScore 1.50
;; size=8 bbWeight=0.50 PerfScore 1.50
G_M10695_IG05:
G_M10695_IG05:
xor eax, eax
xor eax, eax
mov ecx, esi
mov ecx, esi
lea rsi, [rcx-0x04]
add rcx, -4
vmovups xmm1, xmmword ptr [reloc @RWD32]
vmovups xmm1, xmmword ptr [reloc @RWD32]
jmp SHORT G_M10695_IG07
jmp SHORT G_M10695_IG07
;; size=18 bbWeight=1 PerfScore 6.00
;; size=18 bbWeight=1 PerfScore 5.75
G_M10695_IG06:
G_M10695_IG06:
cmp rax, rsi
add rax, 4
cmp rax, rcx
jbe SHORT G_M10695_IG07
jbe SHORT G_M10695_IG07
mov rax, rsi
mov rax, rcx
;; size=8 bbWeight=4 PerfScore 6.00
;; size=12 bbWeight=4 PerfScore 7.00
G_M10695_IG07:
G_M10695_IG07:
vmovd xmm2, dword ptr [rdi+rax]
vmovd xmm2, dword ptr [rdi+rax]
vpsrlq xmm3, xmm2, 4
vpsrlq xmm3, xmm2, 4
vpunpcklbw xmm2, xmm3, xmm2
vpunpcklbw xmm2, xmm3, xmm2
vpand xmm2, xmm1, xmm2
vpand xmm2, xmm1, xmm2
vpshufb xmm2, xmm0, xmm2
vpshufb xmm2, xmm0, xmm2
vpmovzxbw xmm2, xmm2
vpmovzxbw xmm2, xmm2
vmovups xmmword ptr [rdx+4*rax], xmm2
vmovups xmmword ptr [rdx+4*rax], xmm2
add rax, 4
cmp rax, rcx
cmp rax, rcx
jne SHORT G_M10695_IG06
jne SHORT G_M10695_IG06
;; size=42 bbWeight=8 PerfScore 94.67
;; size=38 bbWeight=8 PerfScore 92.67
G_M10695_IG08:
G_M10695_IG08:
pop rbp
pop rbp
ret
ret
;; size=2 bbWeight=1 PerfScore 1.50
;; size=2 bbWeight=1 PerfScore 1.50
RWD00 dq 3736353433323130h, 4645444342413938h
RWD00 dq 3736353433323130h, 4645444342413938h
RWD16 dq 3736353433323130h, 6665646362613938h
RWD16 dq 3736353433323130h, 6665646362613938h
RWD32 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
RWD32 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh




; Total bytes of code 97, prolog size 4, PerfScore 114.67, instruction count 28, allocated bytes for code 97 (MethodHash=925ed638) for method System.HexConverter:EncodeToUtf16_Vector128(System.ReadOnlySpan`1[ubyte],System.Span`1[ushort],uint) (FullOpts)
; Total bytes of code 97, prolog size 4, PerfScore 113.42, instruction count 28, allocated bytes for code 97 (MethodHash=925ed638) for method System.HexConverter:EncodeToUtf16_Vector128(System.ReadOnlySpan`1[ubyte],System.Span`1[ushort],uint) (FullOpts)
; ============================================================
; ============================================================