77841-CopyMatrix-diffs

Created Diff never expires
31 removals
275 lines
30 additions
274 lines
; Assembly listing for method PimpMyNet.Program:CopyMatrix(double[][],double[][])
; Assembly listing for method PimpMyNet.Program:CopyMatrix(double[][],double[][])
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; Emitting BLENDED_CODE for X64 CPU with AVX - Windows
; optimized code
; optimized code
; rsp based frame
; rsp based frame
; fully interruptible
; fully interruptible
; No PGO data
; No PGO data


G_M000_IG01: ;; offset=0000H
G_M000_IG01: ;; offset=0000H
4157 push r15
4157 push r15
4156 push r14
4156 push r14
4154 push r12
4154 push r12
57 push rdi
57 push rdi
56 push rsi
56 push rsi
55 push rbp
55 push rbp
53 push rbx
53 push rbx
4883EC20 sub rsp, 32
4883EC20 sub rsp, 32
C5F877 vzeroupper
C5F877 vzeroupper


G_M000_IG02: ;; offset=0011H
G_M000_IG02: ;; offset=0011H
8B4208 mov eax, dword ptr [rdx+08H]
8B4208 mov eax, dword ptr [rdx+08H]
448BC0 mov r8d, eax
448BC0 mov r8d, eax
85C0 test eax, eax
85C0 test eax, eax
0F8458030000 je G_M000_IG19
0F8452030000 je G_M000_IG19
4C8B4A10 mov r9, gword ptr [rdx+10H]
4C8B4A10 mov r9, gword ptr [rdx+10H]
458B4908 mov r9d, dword ptr [r9+08H]
458B4908 mov r9d, dword ptr [r9+08H]
458BD1 mov r10d, r9d
458BD1 mov r10d, r9d
4183E203 and r10d, 3
4183E203 and r10d, 3
4533DB xor r11d, r11d
4533DB xor r11d, r11d
4585C0 test r8d, r8d
4585C0 test r8d, r8d
0F8E2E030000 jle G_M000_IG18
0F8E28030000 jle G_M000_IG18
4885C9 test rcx, rcx
4885C9 test rcx, rcx
0F841F020000 je G_M000_IG13
0F8419020000 je G_M000_IG13
44394108 cmp dword ptr [rcx+08H], r8d
44394108 cmp dword ptr [rcx+08H], r8d
0F8C15020000 jl G_M000_IG13
0F8C0F020000 jl G_M000_IG13


G_M000_IG03: ;; offset=004DH
G_M000_IG03: ;; offset=004DH
418BC3 mov eax, r11d
418BC3 mov eax, r11d
488B74C110 mov rsi, gword ptr [rcx+8*rax+10H]
488B74C110 mov rsi, gword ptr [rcx+8*rax+10H]
488B44C210 mov rax, gword ptr [rdx+8*rax+10H]
488B44C210 mov rax, gword ptr [rdx+8*rax+10H]
33FF xor edi, edi
33FF xor edi, edi
4585D2 test r10d, r10d
4585D2 test r10d, r10d
7E67 jle SHORT G_M000_IG07
7E67 jle SHORT G_M000_IG07
4885C0 test rax, rax
4885C0 test rax, rax
742F je SHORT G_M000_IG06
742F je SHORT G_M000_IG06
4885F6 test rsi, rsi
4885F6 test rsi, rsi
742A je SHORT G_M000_IG06
742A je SHORT G_M000_IG06
8B5808 mov ebx, dword ptr [rax+08H]
8B5808 mov ebx, dword ptr [rax+08H]
413BDA cmp ebx, r10d
413BDA cmp ebx, r10d
7C22 jl SHORT G_M000_IG06
7C22 jl SHORT G_M000_IG06
8B6E08 mov ebp, dword ptr [rsi+08H]
8B6E08 mov ebp, dword ptr [rsi+08H]
413BEA cmp ebp, r10d
413BEA cmp ebp, r10d
7C1A jl SHORT G_M000_IG06
7C1A jl SHORT G_M000_IG06
90 align [1 bytes for IG04]
90 align [1 bytes for IG04]


G_M000_IG04: ;; offset=0080H
G_M000_IG04: ;; offset=0080H
8BDF mov ebx, edi
8BDF mov ebx, edi
C5FB1044D810 vmovsd xmm0, qword ptr [rax+8*rbx+10H]
C5FB1044D810 vmovsd xmm0, qword ptr [rax+8*rbx+10H]
C5FB1144DE10 vmovsd qword ptr [rsi+8*rbx+10H], xmm0
C5FB1144DE10 vmovsd qword ptr [rsi+8*rbx+10H], xmm0
FFC7 inc edi
FFC7 inc edi
413BFA cmp edi, r10d
413BFA cmp edi, r10d
7CEB jl SHORT G_M000_IG04
7CEB jl SHORT G_M000_IG04


G_M000_IG05: ;; offset=0095H
G_M000_IG05: ;; offset=0095H
EB2F jmp SHORT G_M000_IG07
EB2F jmp SHORT G_M000_IG07
align [0 bytes for IG08]
align [0 bytes for IG08]


G_M000_IG06: ;; offset=0097H
G_M000_IG06: ;; offset=0097H
8B5808 mov ebx, dword ptr [rax+08H]
8B5808 mov ebx, dword ptr [rax+08H]
3BFB cmp edi, ebx
3BFB cmp edi, ebx
0F83D3020000 jae G_M000_IG19
0F83CD020000 jae G_M000_IG19
8BEF mov ebp, edi
8BEF mov ebp, edi
C5FB1044E810 vmovsd xmm0, qword ptr [rax+8*rbp+10H]
C5FB1044E810 vmovsd xmm0, qword ptr [rax+8*rbp+10H]
8B6E08 mov ebp, dword ptr [rsi+08H]
8B6E08 mov ebp, dword ptr [rsi+08H]
3BFD cmp edi, ebp
3BFD cmp edi, ebp
0F83BF020000 jae G_M000_IG19
0F83B9020000 jae G_M000_IG19
8BDF mov ebx, edi
8BDF mov ebx, edi
C5FB1144DE10 vmovsd qword ptr [rsi+8*rbx+10H], xmm0
C5FB1144DE10 vmovsd qword ptr [rsi+8*rbx+10H], xmm0
FFC7 inc edi
FFC7 inc edi
413BFA cmp edi, r10d
413BFA cmp edi, r10d
7CD3 jl SHORT G_M000_IG06
7CD3 jl SHORT G_M000_IG06


G_M000_IG07: ;; offset=00C4H
G_M000_IG07: ;; offset=00C4H
418BFA mov edi, r10d
418BFA mov edi, r10d
453BD1 cmp r10d, r9d
453BD1 cmp r10d, r9d
0F8D7D010000 jge G_M000_IG11
0F8D77010000 jge G_M000_IG11
4885C0 test rax, rax
4885C0 test rax, rax
0F84CA000000 je G_M000_IG10
0F84C4000000 je G_M000_IG10
4885F6 test rsi, rsi
4885F6 test rsi, rsi
0F84C1000000 je G_M000_IG10
0F84BB000000 je G_M000_IG10
4585D2 test r10d, r10d
418BEA mov ebp, r10d
0F8CB8000000 jl G_M000_IG10
410BE9 or ebp, r9d
4585C9 test r9d, r9d
0F8CAF000000 jl G_M000_IG10
0F8CAF000000 jl G_M000_IG10
8B5808 mov ebx, dword ptr [rax+08H]
8B5808 mov ebx, dword ptr [rax+08H]
413BD9 cmp ebx, r9d
413BD9 cmp ebx, r9d
0F8CA3000000 jl G_M000_IG10
0F8CA3000000 jl G_M000_IG10
8B6E08 mov ebp, dword ptr [rsi+08H]
8B6E08 mov ebp, dword ptr [rsi+08H]
413BE9 cmp ebp, r9d
413BE9 cmp ebp, r9d
0F8C97000000 jl G_M000_IG10
0F8C97000000 jl G_M000_IG10


G_M000_IG08: ;; offset=010CH
G_M000_IG08: ;; offset=0106H
448BF7 mov r14d, edi
448BF7 mov r14d, edi
C4A17B104CF010 vmovsd xmm1, qword ptr [rax+8*r14+10H]
C4A17B104CF010 vmovsd xmm1, qword ptr [rax+8*r14+10H]
C4A17B114CF610 vmovsd qword ptr [rsi+8*r14+10H], xmm1
C4A17B114CF610 vmovsd qword ptr [rsi+8*r14+10H], xmm1
448D7701 lea r14d, [rdi+01H]
448D7701 lea r14d, [rdi+01H]
443BF3 cmp r14d, ebx
443BF3 cmp r14d, ebx
0F8349020000 jae G_M000_IG19
0F8349020000 jae G_M000_IG19
458BFE mov r15d, r14d
458BFE mov r15d, r14d
C4A17B1054F810 vmovsd xmm2, qword ptr [rax+8*r15+10H]
C4A17B1054F810 vmovsd xmm2, qword ptr [rax+8*r15+10H]
443BF5 cmp r14d, ebp
443BF5 cmp r14d, ebp
0F8336020000 jae G_M000_IG19
0F8336020000 jae G_M000_IG19
C4A17B1154FE10 vmovsd qword ptr [rsi+8*r15+10H], xmm2
C4A17B1154FE10 vmovsd qword ptr [rsi+8*r15+10H], xmm2
448D7702 lea r14d, [rdi+02H]
448D7702 lea r14d, [rdi+02H]
443BF3 cmp r14d, ebx
443BF3 cmp r14d, ebx
0F8322020000 jae G_M000_IG19
0F8322020000 jae G_M000_IG19
458BFE mov r15d, r14d
458BFE mov r15d, r14d
C4A17B105CF810 vmovsd xmm3, qword ptr [rax+8*r15+10H]
C4A17B105CF810 vmovsd xmm3, qword ptr [rax+8*r15+10H]
443BF5 cmp r14d, ebp
443BF5 cmp r14d, ebp
0F830F020000 jae G_M000_IG19
0F830F020000 jae G_M000_IG19
C4A17B115CFE10 vmovsd qword ptr [rsi+8*r15+10H], xmm3
C4A17B115CFE10 vmovsd qword ptr [rsi+8*r15+10H], xmm3
448D7703 lea r14d, [rdi+03H]
448D7703 lea r14d, [rdi+03H]
443BF3 cmp r14d, ebx
443BF3 cmp r14d, ebx
0F83FB010000 jae G_M000_IG19
0F83FB010000 jae G_M000_IG19
458BFE mov r15d, r14d
458BFE mov r15d, r14d
C4A17B1064F810 vmovsd xmm4, qword ptr [rax+8*r15+10H]
C4A17B1064F810 vmovsd xmm4, qword ptr [rax+8*r15+10H]
443BF5 cmp r14d, ebp
443BF5 cmp r14d, ebp
0F83E8010000 jae G_M000_IG19
0F83E8010000 jae G_M000_IG19
C4A17B1164FE10 vmovsd qword ptr [rsi+8*r15+10H], xmm4
C4A17B1164FE10 vmovsd qword ptr [rsi+8*r15+10H], xmm4
83C704 add edi, 4
83C704 add edi, 4
413BF9 cmp edi, r9d
413BF9 cmp edi, r9d
0F8C6EFFFFFF jl G_M000_IG08
0F8C6EFFFFFF jl G_M000_IG08


G_M000_IG09: ;; offset=019EH
G_M000_IG09: ;; offset=0198H
E9AA000000 jmp G_M000_IG11
E9AA000000 jmp G_M000_IG11


G_M000_IG10: ;; offset=01A3H
G_M000_IG10: ;; offset=019DH
8B5808 mov ebx, dword ptr [rax+08H]
8B5808 mov ebx, dword ptr [rax+08H]
3BFB cmp edi, ebx
3BFB cmp edi, ebx
0F83C5010000 jae G_M000_IG19
0F83C5010000 jae G_M000_IG19
8BEF mov ebp, edi
8BEF mov ebp, edi
C5FB104CE810 vmovsd xmm1, qword ptr [rax+8*rbp+10H]
C5FB104CE810 vmovsd xmm1, qword ptr [rax+8*rbp+10H]
8B6E08 mov ebp, dword ptr [rsi+08H]
8B6E08 mov ebp, dword ptr [rsi+08H]
3BFD cmp edi, ebp
3BFD cmp edi, ebp
0F83B1010000 jae G_M000_IG19
0F83B1010000 jae G_M000_IG19
448BF7 mov r14d, edi
448BF7 mov r14d, edi
C4A17B114CF610 vmovsd qword ptr [rsi+8*r14+10H], xmm1
C4A17B114CF610 vmovsd qword ptr [rsi+8*r14+10H], xmm1
448D7701 lea r14d, [rdi+01H]
448D7701 lea r14d, [rdi+01H]
443BF3 cmp r14d, ebx
443BF3 cmp r14d, ebx
0F839A010000 jae G_M000_IG19
0F839A010000 jae G_M000_IG19
458BFE mov r15d, r14d
458BFE mov r15d, r14d
C4A17B1054F810 vmovsd xmm2, qword ptr [rax+8*r15+10H]
C4A17B1054F810 vmovsd xmm2, qword ptr [rax+8*r15+10H]
443BF5 cmp r14d, ebp
443BF5 cmp r14d, ebp
0F8387010000 jae G_M000_IG19
0F8387010000 jae G_M000_IG19
C4A17B1154FE10 vmovsd qword ptr [rsi+8*r15+10H], xmm2
C4A17B1154FE10 vmovsd qword ptr [rsi+8*r15+10H], xmm2
448D7702 lea r14d, [rdi+02H]
448D7702 lea r14d, [rdi+02H]
443BF3 cmp r14d, ebx
443BF3 cmp r14d, ebx
0F8373010000 jae G_M000_IG19
0F8373010000 jae G_M000_IG19
458BFE mov r15d, r14d
458BFE mov r15d, r14d
C4A17B105CF810 vmovsd xmm3, qword ptr [rax+8*r15+10H]
C4A17B105CF810 vmovsd xmm3, qword ptr [rax+8*r15+10H]
443BF5 cmp r14d, ebp
443BF5 cmp r14d, ebp
0F8360010000 jae G_M000_IG19
0F8360010000 jae G_M000_IG19
C4A17B115CFE10 vmovsd qword ptr [rsi+8*r15+10H], xmm3
C4A17B115CFE10 vmovsd qword ptr [rsi+8*r15+10H], xmm3
448D7703 lea r14d, [rdi+03H]
448D7703 lea r14d, [rdi+03H]
443BF3 cmp r14d, ebx
443BF3 cmp r14d, ebx
0F834C010000 jae G_M000_IG19
0F834C010000 jae G_M000_IG19
418BDE mov ebx, r14d
418BDE mov ebx, r14d
C5FB1064D810 vmovsd xmm4, qword ptr [rax+8*rbx+10H]
C5FB1064D810 vmovsd xmm4, qword ptr [rax+8*rbx+10H]
443BF5 cmp r14d, ebp
443BF5 cmp r14d, ebp
0F8339010000 jae G_M000_IG19
0F8339010000 jae G_M000_IG19
C5FB1164DE10 vmovsd qword ptr [rsi+8*rbx+10H], xmm4
C5FB1164DE10 vmovsd qword ptr [rsi+8*rbx+10H], xmm4
83C704 add edi, 4
83C704 add edi, 4
413BF9 cmp edi, r9d
413BF9 cmp edi, r9d
0F8C59FFFFFF jl G_M000_IG10
0F8C59FFFFFF jl G_M000_IG10


G_M000_IG11: ;; offset=024AH
G_M000_IG11: ;; offset=0244H
41FFC3 inc r11d
41FFC3 inc r11d
453BD8 cmp r11d, r8d
453BD8 cmp r11d, r8d
0F8CF7FDFFFF jl G_M000_IG03
0F8CFDFDFFFF jl G_M000_IG03


G_M000_IG12: ;; offset=0256H
G_M000_IG12: ;; offset=0250H
E906010000 jmp G_M000_IG18
E906010000 jmp G_M000_IG18


G_M000_IG13: ;; offset=025BH
G_M000_IG13: ;; offset=0255H
443B5908 cmp r11d, dword ptr [rcx+08H]
443B5908 cmp r11d, dword ptr [rcx+08H]
0F830B010000 jae G_M000_IG19
0F830B010000 jae G_M000_IG19
418BF3 mov esi, r11d
418BF3 mov esi, r11d
488B74F110 mov rsi, gword ptr [rcx+8*rsi+10H]
488B74F110 mov rsi, gword ptr [rcx+8*rsi+10H]
443BD8 cmp r11d, eax
443BD8 cmp r11d, eax
0F83FA000000 jae G_M000_IG19
0F83FA000000 jae G_M000_IG19
418BFB mov edi, r11d
418BFB mov edi, r11d
488B7CFA10 mov rdi, gword ptr [rdx+8*rdi+10H]
488B7CFA10 mov rdi, gword ptr [rdx+8*rdi+10H]
33DB xor ebx, ebx
33DB xor ebx, ebx
4585D2 test r10d, r10d
4585D2 test r10d, r10d
7E2F jle SHORT G_M000_IG15
7E2F jle SHORT G_M000_IG15


G_M000_IG14: ;; offset=0285H
G_M000_IG14: ;; offset=027FH
8B6F08 mov ebp, dword ptr [rdi+08H]
8B6F08 mov ebp, dword ptr [rdi+08H]
3BDD cmp ebx, ebp
3BDD cmp ebx, ebp
0F83E0000000 jae G_M000_IG19
0F83E0000000 jae G_M000_IG19
8BEB mov ebp, ebx
8BEB mov ebp, ebx
C5FB1044EF10 vmovsd xmm0, qword ptr [rdi+8*rbp+10H]
C5FB1044EF10 vmovsd xmm0, qword ptr [rdi+8*rbp+10H]
8B6E08 mov ebp, dword ptr [rsi+08H]
8B6E08 mov ebp, dword ptr [rsi+08H]
3BDD cmp ebx, ebp
3BDD cmp ebx, ebp
0F83CC000000 jae G_M000_IG19
0F83CC000000 jae G_M000_IG19
8BEB mov ebp, ebx
8BEB mov ebp, ebx
C5FB1144EE10 vmovsd qword ptr [rsi+8*rbp+10H], xmm0
C5FB1144EE10 vmovsd qword ptr [rsi+8*rbp+10H], xmm0
FFC3 inc ebx
FFC3 inc ebx
413BDA cmp ebx, r10d
413BDA cmp ebx, r10d
7CD3 jl SHORT G_M000_IG14
7CD3 jl SHORT G_M000_IG14


G_M000_IG15: ;; offset=02B2H
G_M000_IG15: ;; offset=02ACH
418BDA mov ebx, r10d
418BDA mov ebx, r10d
453BD1 cmp r10d, r9d
453BD1 cmp r10d, r9d
0F8D95000000 jge G_M000_IG17
0F8D95000000 jge G_M000_IG17


G_M000_IG16: ;; offset=02BEH
G_M000_IG16: ;; offset=02B8H
8B6F08 mov ebp, dword ptr [rdi+08H]
8B6F08 mov ebp, dword ptr [rdi+08H]
3BDD cmp ebx, ebp
3BDD cmp ebx, ebp
0F83A5000000 jae G_M000_IG19
0F83A5000000 jae G_M000_IG19
448BF3 mov r14d, ebx
448BF3 mov r14d, ebx
C4A17B104CF710 vmovsd xmm1, qword ptr [rdi+8*r14+10H]
C4A17B104CF710 vmovsd xmm1, qword ptr [rdi+8*r14+10H]
448B7608 mov r14d, dword ptr [rsi+08H]
448B7608 mov r14d, dword ptr [rsi+08H]
413BDE cmp ebx, r14d
413BDE cmp ebx, r14d
0F838E000000 jae G_M000_IG19
0F838E000000 jae G_M000_IG19
448BFB mov r15d, ebx
448BFB mov r15d, ebx
C4A17B114CFE10 vmovsd qword ptr [rsi+8*r15+10H], xmm1
C4A17B114CFE10 vmovsd qword ptr [rsi+8*r15+10H], xmm1
448D7B01 lea r15d, [rbx+01H]
448D7B01 lea r15d, [rbx+01H]
443BFD cmp r15d, ebp
443BFD cmp r15d, ebp
737B jae SHORT G_M000_IG19
737B jae SHORT G_M000_IG19
458BE7 mov r12d, r15d
458BE7 mov r12d, r15d
C4A17B1054E710 vmovsd xmm2, qword ptr [rdi+8*r12+10H]
C4A17B1054E710 vmovsd xmm2, qword ptr [rdi+8*r12+10H]
453BFE cmp r15d, r14d
453BFE cmp r15d, r14d
736C jae SHORT G_M000_IG19
736C jae SHORT G_M000_IG19
C4A17B1154E610 vmovsd qword ptr [rsi+8*r12+10H], xmm2
C4A17B1154E610 vmovsd qword ptr [rsi+8*r12+10H], xmm2
448D7B02 lea r15d, [rbx+02H]
448D7B02 lea r15d, [rbx+02H]
443BFD cmp r15d, ebp
443BFD cmp r15d, ebp
735C jae SHORT G_M000_IG19
735C jae SHORT G_M000_IG19
458BE7 mov r12d, r15d
458BE7 mov r12d, r15d
C4A17B105CE710 vmovsd xmm3, qword ptr [rdi+8*r12+10H]
C4A17B105CE710 vmovsd xmm3, qword ptr [rdi+8*r12+10H]
453BFE cmp r15d, r14d
453BFE cmp r15d, r14d
734D jae SHORT G_M000_IG19
734D jae SHORT G_M000_IG19
C4A17B115CE610 vmovsd qword ptr [rsi+8*r12+10H], xmm3
C4A17B115CE610 vmovsd qword ptr [rsi+8*r12+10H], xmm3
448D7B03 lea r15d, [rbx+03H]
448D7B03 lea r15d, [rbx+03H]
443BFD cmp r15d, ebp
443BFD cmp r15d, ebp
733D jae SHORT G_M000_IG19
733D jae SHORT G_M000_IG19
418BEF mov ebp, r15d
418BEF mov ebp, r15d
C5FB1064EF10 vmovsd xmm4, qword ptr [rdi+8*rbp+10H]
C5FB1064EF10 vmovsd xmm4, qword ptr [rdi+8*rbp+10H]
453BFE cmp r15d, r14d
453BFE cmp r15d, r14d
732E jae SHORT G_M000_IG19
732E jae SHORT G_M000_IG19
C5FB1164EE10 vmovsd qword ptr [rsi+8*rbp+10H], xmm4
C5FB1164EE10 vmovsd qword ptr [rsi+8*rbp+10H], xmm4
83C304 add ebx, 4
83C304 add ebx, 4
413BD9 cmp ebx, r9d
413BD9 cmp ebx, r9d
0F8C6DFFFFFF jl G_M000_IG16
0F8C6DFFFFFF jl G_M000_IG16


G_M000_IG17: ;; offset=0351H
G_M000_IG17: ;; offset=034BH
41FFC3 inc r11d
41FFC3 inc r11d
453BD8 cmp r11d, r8d
453BD8 cmp r11d, r8d
0F8CFEFEFFFF jl G_M000_IG13
0F8CFEFEFFFF jl G_M000_IG13


G_M000_IG18: ;; offset=035DH
G_M000_IG18: ;; offset=0357H
4883C420 add rsp, 32
4883C420 add rsp, 32
5B pop rbx
5B pop rbx
5D pop rbp
5D pop rbp
5E pop rsi
5E pop rsi
5F pop rdi
5F pop rdi
415C pop r12
415C pop r12
415E pop r14
415E pop r14
415F pop r15
415F pop r15
C3 ret
C3 ret


G_M000_IG19: ;; offset=036CH
G_M000_IG19: ;; offset=0366H
E85F20AB5F call CORINFO_HELP_RNGCHKFAIL
E86520AB5F call CORINFO_HELP_RNGCHKFAIL
CC int3
CC int3


; Total bytes of code 882
; Total bytes of code 876