Import OpenSSL 1.0.2k (as of svn r86089)

This commit is contained in:
Zachary Ware
2017-05-22 14:30:47 -05:00
parent d239d63057
commit ccd3ab4aff
2486 changed files with 943951 additions and 0 deletions

2864
tmp64/aes-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

945
tmp64/aesni-gcm-x86_64.asm Normal file
View File

@@ -0,0 +1,945 @@
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64
ALIGN 32
_aesni_ctr32_ghash_6x:
vmovdqu xmm2,XMMWORD[32+r11]
sub rdx,6
vpxor xmm4,xmm4,xmm4
vmovdqu xmm15,XMMWORD[((0-128))+rcx]
vpaddb xmm10,xmm1,xmm2
vpaddb xmm11,xmm10,xmm2
vpaddb xmm12,xmm11,xmm2
vpaddb xmm13,xmm12,xmm2
vpaddb xmm14,xmm13,xmm2
vpxor xmm9,xmm1,xmm15
vmovdqu XMMWORD[(16+8)+rsp],xmm4
jmp NEAR $L$oop6x
ALIGN 32
$L$oop6x:
add ebx,100663296
jc NEAR $L$handle_ctr32
vmovdqu xmm3,XMMWORD[((0-32))+r9]
vpaddb xmm1,xmm14,xmm2
vpxor xmm10,xmm10,xmm15
vpxor xmm11,xmm11,xmm15
$L$resume_ctr32:
vmovdqu XMMWORD[r8],xmm1
vpclmulqdq xmm5,xmm7,xmm3,0x10
vpxor xmm12,xmm12,xmm15
vmovups xmm2,XMMWORD[((16-128))+rcx]
vpclmulqdq xmm6,xmm7,xmm3,0x01
xor r12,r12
cmp r15,r14
vaesenc xmm9,xmm9,xmm2
vmovdqu xmm0,XMMWORD[((48+8))+rsp]
vpxor xmm13,xmm13,xmm15
vpclmulqdq xmm1,xmm7,xmm3,0x00
vaesenc xmm10,xmm10,xmm2
vpxor xmm14,xmm14,xmm15
setnc r12b
vpclmulqdq xmm7,xmm7,xmm3,0x11
vaesenc xmm11,xmm11,xmm2
vmovdqu xmm3,XMMWORD[((16-32))+r9]
neg r12
vaesenc xmm12,xmm12,xmm2
vpxor xmm6,xmm6,xmm5
vpclmulqdq xmm5,xmm0,xmm3,0x00
vpxor xmm8,xmm8,xmm4
vaesenc xmm13,xmm13,xmm2
vpxor xmm4,xmm1,xmm5
and r12,0x60
vmovups xmm15,XMMWORD[((32-128))+rcx]
vpclmulqdq xmm1,xmm0,xmm3,0x10
vaesenc xmm14,xmm14,xmm2
vpclmulqdq xmm2,xmm0,xmm3,0x01
lea r14,[r12*1+r14]
vaesenc xmm9,xmm9,xmm15
vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]
vpclmulqdq xmm3,xmm0,xmm3,0x11
vmovdqu xmm0,XMMWORD[((64+8))+rsp]
vaesenc xmm10,xmm10,xmm15
movbe r13,QWORD[88+r14]
vaesenc xmm11,xmm11,xmm15
movbe r12,QWORD[80+r14]
vaesenc xmm12,xmm12,xmm15
mov QWORD[((32+8))+rsp],r13
vaesenc xmm13,xmm13,xmm15
mov QWORD[((40+8))+rsp],r12
vmovdqu xmm5,XMMWORD[((48-32))+r9]
vaesenc xmm14,xmm14,xmm15
vmovups xmm15,XMMWORD[((48-128))+rcx]
vpxor xmm6,xmm6,xmm1
vpclmulqdq xmm1,xmm0,xmm5,0x00
vaesenc xmm9,xmm9,xmm15
vpxor xmm6,xmm6,xmm2
vpclmulqdq xmm2,xmm0,xmm5,0x10
vaesenc xmm10,xmm10,xmm15
vpxor xmm7,xmm7,xmm3
vpclmulqdq xmm3,xmm0,xmm5,0x01
vaesenc xmm11,xmm11,xmm15
vpclmulqdq xmm5,xmm0,xmm5,0x11
vmovdqu xmm0,XMMWORD[((80+8))+rsp]
vaesenc xmm12,xmm12,xmm15
vaesenc xmm13,xmm13,xmm15
vpxor xmm4,xmm4,xmm1
vmovdqu xmm1,XMMWORD[((64-32))+r9]
vaesenc xmm14,xmm14,xmm15
vmovups xmm15,XMMWORD[((64-128))+rcx]
vpxor xmm6,xmm6,xmm2
vpclmulqdq xmm2,xmm0,xmm1,0x00
vaesenc xmm9,xmm9,xmm15
vpxor xmm6,xmm6,xmm3
vpclmulqdq xmm3,xmm0,xmm1,0x10
vaesenc xmm10,xmm10,xmm15
movbe r13,QWORD[72+r14]
vpxor xmm7,xmm7,xmm5
vpclmulqdq xmm5,xmm0,xmm1,0x01
vaesenc xmm11,xmm11,xmm15
movbe r12,QWORD[64+r14]
vpclmulqdq xmm1,xmm0,xmm1,0x11
vmovdqu xmm0,XMMWORD[((96+8))+rsp]
vaesenc xmm12,xmm12,xmm15
mov QWORD[((48+8))+rsp],r13
vaesenc xmm13,xmm13,xmm15
mov QWORD[((56+8))+rsp],r12
vpxor xmm4,xmm4,xmm2
vmovdqu xmm2,XMMWORD[((96-32))+r9]
vaesenc xmm14,xmm14,xmm15
vmovups xmm15,XMMWORD[((80-128))+rcx]
vpxor xmm6,xmm6,xmm3
vpclmulqdq xmm3,xmm0,xmm2,0x00
vaesenc xmm9,xmm9,xmm15
vpxor xmm6,xmm6,xmm5
vpclmulqdq xmm5,xmm0,xmm2,0x10
vaesenc xmm10,xmm10,xmm15
movbe r13,QWORD[56+r14]
vpxor xmm7,xmm7,xmm1
vpclmulqdq xmm1,xmm0,xmm2,0x01
vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp]
vaesenc xmm11,xmm11,xmm15
movbe r12,QWORD[48+r14]
vpclmulqdq xmm2,xmm0,xmm2,0x11
vaesenc xmm12,xmm12,xmm15
mov QWORD[((64+8))+rsp],r13
vaesenc xmm13,xmm13,xmm15
mov QWORD[((72+8))+rsp],r12
vpxor xmm4,xmm4,xmm3
vmovdqu xmm3,XMMWORD[((112-32))+r9]
vaesenc xmm14,xmm14,xmm15
vmovups xmm15,XMMWORD[((96-128))+rcx]
vpxor xmm6,xmm6,xmm5
vpclmulqdq xmm5,xmm8,xmm3,0x10
vaesenc xmm9,xmm9,xmm15
vpxor xmm6,xmm6,xmm1
vpclmulqdq xmm1,xmm8,xmm3,0x01
vaesenc xmm10,xmm10,xmm15
movbe r13,QWORD[40+r14]
vpxor xmm7,xmm7,xmm2
vpclmulqdq xmm2,xmm8,xmm3,0x00
vaesenc xmm11,xmm11,xmm15
movbe r12,QWORD[32+r14]
vpclmulqdq xmm8,xmm8,xmm3,0x11
vaesenc xmm12,xmm12,xmm15
mov QWORD[((80+8))+rsp],r13
vaesenc xmm13,xmm13,xmm15
mov QWORD[((88+8))+rsp],r12
vpxor xmm6,xmm6,xmm5
vaesenc xmm14,xmm14,xmm15
vpxor xmm6,xmm6,xmm1
vmovups xmm15,XMMWORD[((112-128))+rcx]
vpslldq xmm5,xmm6,8
vpxor xmm4,xmm4,xmm2
vmovdqu xmm3,XMMWORD[16+r11]
vaesenc xmm9,xmm9,xmm15
vpxor xmm7,xmm7,xmm8
vaesenc xmm10,xmm10,xmm15
vpxor xmm4,xmm4,xmm5
movbe r13,QWORD[24+r14]
vaesenc xmm11,xmm11,xmm15
movbe r12,QWORD[16+r14]
vpalignr xmm0,xmm4,xmm4,8
vpclmulqdq xmm4,xmm4,xmm3,0x10
mov QWORD[((96+8))+rsp],r13
vaesenc xmm12,xmm12,xmm15
mov QWORD[((104+8))+rsp],r12
vaesenc xmm13,xmm13,xmm15
vmovups xmm1,XMMWORD[((128-128))+rcx]
vaesenc xmm14,xmm14,xmm15
vaesenc xmm9,xmm9,xmm1
vmovups xmm15,XMMWORD[((144-128))+rcx]
vaesenc xmm10,xmm10,xmm1
vpsrldq xmm6,xmm6,8
vaesenc xmm11,xmm11,xmm1
vpxor xmm7,xmm7,xmm6
vaesenc xmm12,xmm12,xmm1
vpxor xmm4,xmm4,xmm0
movbe r13,QWORD[8+r14]
vaesenc xmm13,xmm13,xmm1
movbe r12,QWORD[r14]
vaesenc xmm14,xmm14,xmm1
vmovups xmm1,XMMWORD[((160-128))+rcx]
cmp ebp,11
jb NEAR $L$enc_tail
vaesenc xmm9,xmm9,xmm15
vaesenc xmm10,xmm10,xmm15
vaesenc xmm11,xmm11,xmm15
vaesenc xmm12,xmm12,xmm15
vaesenc xmm13,xmm13,xmm15
vaesenc xmm14,xmm14,xmm15
vaesenc xmm9,xmm9,xmm1
vaesenc xmm10,xmm10,xmm1
vaesenc xmm11,xmm11,xmm1
vaesenc xmm12,xmm12,xmm1
vaesenc xmm13,xmm13,xmm1
vmovups xmm15,XMMWORD[((176-128))+rcx]
vaesenc xmm14,xmm14,xmm1
vmovups xmm1,XMMWORD[((192-128))+rcx]
je NEAR $L$enc_tail
vaesenc xmm9,xmm9,xmm15
vaesenc xmm10,xmm10,xmm15
vaesenc xmm11,xmm11,xmm15
vaesenc xmm12,xmm12,xmm15
vaesenc xmm13,xmm13,xmm15
vaesenc xmm14,xmm14,xmm15
vaesenc xmm9,xmm9,xmm1
vaesenc xmm10,xmm10,xmm1
vaesenc xmm11,xmm11,xmm1
vaesenc xmm12,xmm12,xmm1
vaesenc xmm13,xmm13,xmm1
vmovups xmm15,XMMWORD[((208-128))+rcx]
vaesenc xmm14,xmm14,xmm1
vmovups xmm1,XMMWORD[((224-128))+rcx]
jmp NEAR $L$enc_tail
ALIGN 32
$L$handle_ctr32:
vmovdqu xmm0,XMMWORD[r11]
vpshufb xmm6,xmm1,xmm0
vmovdqu xmm5,XMMWORD[48+r11]
vpaddd xmm10,xmm6,XMMWORD[64+r11]
vpaddd xmm11,xmm6,xmm5
vmovdqu xmm3,XMMWORD[((0-32))+r9]
vpaddd xmm12,xmm10,xmm5
vpshufb xmm10,xmm10,xmm0
vpaddd xmm13,xmm11,xmm5
vpshufb xmm11,xmm11,xmm0
vpxor xmm10,xmm10,xmm15
vpaddd xmm14,xmm12,xmm5
vpshufb xmm12,xmm12,xmm0
vpxor xmm11,xmm11,xmm15
vpaddd xmm1,xmm13,xmm5
vpshufb xmm13,xmm13,xmm0
vpshufb xmm14,xmm14,xmm0
vpshufb xmm1,xmm1,xmm0
jmp NEAR $L$resume_ctr32
ALIGN 32
$L$enc_tail:
vaesenc xmm9,xmm9,xmm15
vmovdqu XMMWORD[(16+8)+rsp],xmm7
vpalignr xmm8,xmm4,xmm4,8
vaesenc xmm10,xmm10,xmm15
vpclmulqdq xmm4,xmm4,xmm3,0x10
vpxor xmm2,xmm1,XMMWORD[rdi]
vaesenc xmm11,xmm11,xmm15
vpxor xmm0,xmm1,XMMWORD[16+rdi]
vaesenc xmm12,xmm12,xmm15
vpxor xmm5,xmm1,XMMWORD[32+rdi]
vaesenc xmm13,xmm13,xmm15
vpxor xmm6,xmm1,XMMWORD[48+rdi]
vaesenc xmm14,xmm14,xmm15
vpxor xmm7,xmm1,XMMWORD[64+rdi]
vpxor xmm3,xmm1,XMMWORD[80+rdi]
vmovdqu xmm1,XMMWORD[r8]
vaesenclast xmm9,xmm9,xmm2
vmovdqu xmm2,XMMWORD[32+r11]
vaesenclast xmm10,xmm10,xmm0
vpaddb xmm0,xmm1,xmm2
mov QWORD[((112+8))+rsp],r13
lea rdi,[96+rdi]
vaesenclast xmm11,xmm11,xmm5
vpaddb xmm5,xmm0,xmm2
mov QWORD[((120+8))+rsp],r12
lea rsi,[96+rsi]
vmovdqu xmm15,XMMWORD[((0-128))+rcx]
vaesenclast xmm12,xmm12,xmm6
vpaddb xmm6,xmm5,xmm2
vaesenclast xmm13,xmm13,xmm7
vpaddb xmm7,xmm6,xmm2
vaesenclast xmm14,xmm14,xmm3
vpaddb xmm3,xmm7,xmm2
add r10,0x60
sub rdx,0x6
jc NEAR $L$6x_done
vmovups XMMWORD[(-96)+rsi],xmm9
vpxor xmm9,xmm1,xmm15
vmovups XMMWORD[(-80)+rsi],xmm10
vmovdqa xmm10,xmm0
vmovups XMMWORD[(-64)+rsi],xmm11
vmovdqa xmm11,xmm5
vmovups XMMWORD[(-48)+rsi],xmm12
vmovdqa xmm12,xmm6
vmovups XMMWORD[(-32)+rsi],xmm13
vmovdqa xmm13,xmm7
vmovups XMMWORD[(-16)+rsi],xmm14
vmovdqa xmm14,xmm3
vmovdqu xmm7,XMMWORD[((32+8))+rsp]
jmp NEAR $L$oop6x
$L$6x_done:
vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]
vpxor xmm8,xmm8,xmm4
DB 0F3h,0C3h ;repret
global aesni_gcm_decrypt
ALIGN 32
aesni_gcm_decrypt:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_aesni_gcm_decrypt:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
xor r10,r10
cmp rdx,0x60
jb NEAR $L$gcm_dec_abort
lea rax,[rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
lea rsp,[((-168))+rsp]
movaps XMMWORD[(-216)+rax],xmm6
movaps XMMWORD[(-200)+rax],xmm7
movaps XMMWORD[(-184)+rax],xmm8
movaps XMMWORD[(-168)+rax],xmm9
movaps XMMWORD[(-152)+rax],xmm10
movaps XMMWORD[(-136)+rax],xmm11
movaps XMMWORD[(-120)+rax],xmm12
movaps XMMWORD[(-104)+rax],xmm13
movaps XMMWORD[(-88)+rax],xmm14
movaps XMMWORD[(-72)+rax],xmm15
$L$gcm_dec_body:
vzeroupper
vmovdqu xmm1,XMMWORD[r8]
add rsp,-128
mov ebx,DWORD[12+r8]
lea r11,[$L$bswap_mask]
lea r14,[((-128))+rcx]
mov r15,0xf80
vmovdqu xmm8,XMMWORD[r9]
and rsp,-128
vmovdqu xmm0,XMMWORD[r11]
lea rcx,[128+rcx]
lea r9,[((32+32))+r9]
mov ebp,DWORD[((240-128))+rcx]
vpshufb xmm8,xmm8,xmm0
and r14,r15
and r15,rsp
sub r15,r14
jc NEAR $L$dec_no_key_aliasing
cmp r15,768
jnc NEAR $L$dec_no_key_aliasing
sub rsp,r15
$L$dec_no_key_aliasing:
vmovdqu xmm7,XMMWORD[80+rdi]
lea r14,[rdi]
vmovdqu xmm4,XMMWORD[64+rdi]
lea r15,[((-192))+rdx*1+rdi]
vmovdqu xmm5,XMMWORD[48+rdi]
shr rdx,4
xor r10,r10
vmovdqu xmm6,XMMWORD[32+rdi]
vpshufb xmm7,xmm7,xmm0
vmovdqu xmm2,XMMWORD[16+rdi]
vpshufb xmm4,xmm4,xmm0
vmovdqu xmm3,XMMWORD[rdi]
vpshufb xmm5,xmm5,xmm0
vmovdqu XMMWORD[48+rsp],xmm4
vpshufb xmm6,xmm6,xmm0
vmovdqu XMMWORD[64+rsp],xmm5
vpshufb xmm2,xmm2,xmm0
vmovdqu XMMWORD[80+rsp],xmm6
vpshufb xmm3,xmm3,xmm0
vmovdqu XMMWORD[96+rsp],xmm2
vmovdqu XMMWORD[112+rsp],xmm3
call _aesni_ctr32_ghash_6x
vmovups XMMWORD[(-96)+rsi],xmm9
vmovups XMMWORD[(-80)+rsi],xmm10
vmovups XMMWORD[(-64)+rsi],xmm11
vmovups XMMWORD[(-48)+rsi],xmm12
vmovups XMMWORD[(-32)+rsi],xmm13
vmovups XMMWORD[(-16)+rsi],xmm14
vpshufb xmm8,xmm8,XMMWORD[r11]
vmovdqu XMMWORD[(-64)+r9],xmm8
vzeroupper
movaps xmm6,XMMWORD[((-216))+rax]
movaps xmm7,XMMWORD[((-200))+rax]
movaps xmm8,XMMWORD[((-184))+rax]
movaps xmm9,XMMWORD[((-168))+rax]
movaps xmm10,XMMWORD[((-152))+rax]
movaps xmm11,XMMWORD[((-136))+rax]
movaps xmm12,XMMWORD[((-120))+rax]
movaps xmm13,XMMWORD[((-104))+rax]
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$gcm_dec_abort:
mov rax,r10
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_aesni_gcm_decrypt:
ALIGN 32
_aesni_ctr32_6x:
vmovdqu xmm4,XMMWORD[((0-128))+rcx]
vmovdqu xmm2,XMMWORD[32+r11]
lea r13,[((-1))+rbp]
vmovups xmm15,XMMWORD[((16-128))+rcx]
lea r12,[((32-128))+rcx]
vpxor xmm9,xmm1,xmm4
add ebx,100663296
jc NEAR $L$handle_ctr32_2
vpaddb xmm10,xmm1,xmm2
vpaddb xmm11,xmm10,xmm2
vpxor xmm10,xmm10,xmm4
vpaddb xmm12,xmm11,xmm2
vpxor xmm11,xmm11,xmm4
vpaddb xmm13,xmm12,xmm2
vpxor xmm12,xmm12,xmm4
vpaddb xmm14,xmm13,xmm2
vpxor xmm13,xmm13,xmm4
vpaddb xmm1,xmm14,xmm2
vpxor xmm14,xmm14,xmm4
jmp NEAR $L$oop_ctr32
ALIGN 16
$L$oop_ctr32:
vaesenc xmm9,xmm9,xmm15
vaesenc xmm10,xmm10,xmm15
vaesenc xmm11,xmm11,xmm15
vaesenc xmm12,xmm12,xmm15
vaesenc xmm13,xmm13,xmm15
vaesenc xmm14,xmm14,xmm15
vmovups xmm15,XMMWORD[r12]
lea r12,[16+r12]
dec r13d
jnz NEAR $L$oop_ctr32
vmovdqu xmm3,XMMWORD[r12]
vaesenc xmm9,xmm9,xmm15
vpxor xmm4,xmm3,XMMWORD[rdi]
vaesenc xmm10,xmm10,xmm15
vpxor xmm5,xmm3,XMMWORD[16+rdi]
vaesenc xmm11,xmm11,xmm15
vpxor xmm6,xmm3,XMMWORD[32+rdi]
vaesenc xmm12,xmm12,xmm15
vpxor xmm8,xmm3,XMMWORD[48+rdi]
vaesenc xmm13,xmm13,xmm15
vpxor xmm2,xmm3,XMMWORD[64+rdi]
vaesenc xmm14,xmm14,xmm15
vpxor xmm3,xmm3,XMMWORD[80+rdi]
lea rdi,[96+rdi]
vaesenclast xmm9,xmm9,xmm4
vaesenclast xmm10,xmm10,xmm5
vaesenclast xmm11,xmm11,xmm6
vaesenclast xmm12,xmm12,xmm8
vaesenclast xmm13,xmm13,xmm2
vaesenclast xmm14,xmm14,xmm3
vmovups XMMWORD[rsi],xmm9
vmovups XMMWORD[16+rsi],xmm10
vmovups XMMWORD[32+rsi],xmm11
vmovups XMMWORD[48+rsi],xmm12
vmovups XMMWORD[64+rsi],xmm13
vmovups XMMWORD[80+rsi],xmm14
lea rsi,[96+rsi]
DB 0F3h,0C3h ;repret
ALIGN 32
$L$handle_ctr32_2:
vpshufb xmm6,xmm1,xmm0
vmovdqu xmm5,XMMWORD[48+r11]
vpaddd xmm10,xmm6,XMMWORD[64+r11]
vpaddd xmm11,xmm6,xmm5
vpaddd xmm12,xmm10,xmm5
vpshufb xmm10,xmm10,xmm0
vpaddd xmm13,xmm11,xmm5
vpshufb xmm11,xmm11,xmm0
vpxor xmm10,xmm10,xmm4
vpaddd xmm14,xmm12,xmm5
vpshufb xmm12,xmm12,xmm0
vpxor xmm11,xmm11,xmm4
vpaddd xmm1,xmm13,xmm5
vpshufb xmm13,xmm13,xmm0
vpxor xmm12,xmm12,xmm4
vpshufb xmm14,xmm14,xmm0
vpxor xmm13,xmm13,xmm4
vpshufb xmm1,xmm1,xmm0
vpxor xmm14,xmm14,xmm4
jmp NEAR $L$oop_ctr32
global aesni_gcm_encrypt
ALIGN 32
aesni_gcm_encrypt:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_aesni_gcm_encrypt:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
mov r8,QWORD[40+rsp]
mov r9,QWORD[48+rsp]
xor r10,r10
cmp rdx,0x60*3
jb NEAR $L$gcm_enc_abort
lea rax,[rsp]
push rbx
push rbp
push r12
push r13
push r14
push r15
lea rsp,[((-168))+rsp]
movaps XMMWORD[(-216)+rax],xmm6
movaps XMMWORD[(-200)+rax],xmm7
movaps XMMWORD[(-184)+rax],xmm8
movaps XMMWORD[(-168)+rax],xmm9
movaps XMMWORD[(-152)+rax],xmm10
movaps XMMWORD[(-136)+rax],xmm11
movaps XMMWORD[(-120)+rax],xmm12
movaps XMMWORD[(-104)+rax],xmm13
movaps XMMWORD[(-88)+rax],xmm14
movaps XMMWORD[(-72)+rax],xmm15
$L$gcm_enc_body:
vzeroupper
vmovdqu xmm1,XMMWORD[r8]
add rsp,-128
mov ebx,DWORD[12+r8]
lea r11,[$L$bswap_mask]
lea r14,[((-128))+rcx]
mov r15,0xf80
lea rcx,[128+rcx]
vmovdqu xmm0,XMMWORD[r11]
and rsp,-128
mov ebp,DWORD[((240-128))+rcx]
and r14,r15
and r15,rsp
sub r15,r14
jc NEAR $L$enc_no_key_aliasing
cmp r15,768
jnc NEAR $L$enc_no_key_aliasing
sub rsp,r15
$L$enc_no_key_aliasing:
lea r14,[rsi]
lea r15,[((-192))+rdx*1+rsi]
shr rdx,4
call _aesni_ctr32_6x
vpshufb xmm8,xmm9,xmm0
vpshufb xmm2,xmm10,xmm0
vmovdqu XMMWORD[112+rsp],xmm8
vpshufb xmm4,xmm11,xmm0
vmovdqu XMMWORD[96+rsp],xmm2
vpshufb xmm5,xmm12,xmm0
vmovdqu XMMWORD[80+rsp],xmm4
vpshufb xmm6,xmm13,xmm0
vmovdqu XMMWORD[64+rsp],xmm5
vpshufb xmm7,xmm14,xmm0
vmovdqu XMMWORD[48+rsp],xmm6
call _aesni_ctr32_6x
vmovdqu xmm8,XMMWORD[r9]
lea r9,[((32+32))+r9]
sub rdx,12
mov r10,0x60*2
vpshufb xmm8,xmm8,xmm0
call _aesni_ctr32_ghash_6x
vmovdqu xmm7,XMMWORD[32+rsp]
vmovdqu xmm0,XMMWORD[r11]
vmovdqu xmm3,XMMWORD[((0-32))+r9]
vpunpckhqdq xmm1,xmm7,xmm7
vmovdqu xmm15,XMMWORD[((32-32))+r9]
vmovups XMMWORD[(-96)+rsi],xmm9
vpshufb xmm9,xmm9,xmm0
vpxor xmm1,xmm1,xmm7
vmovups XMMWORD[(-80)+rsi],xmm10
vpshufb xmm10,xmm10,xmm0
vmovups XMMWORD[(-64)+rsi],xmm11
vpshufb xmm11,xmm11,xmm0
vmovups XMMWORD[(-48)+rsi],xmm12
vpshufb xmm12,xmm12,xmm0
vmovups XMMWORD[(-32)+rsi],xmm13
vpshufb xmm13,xmm13,xmm0
vmovups XMMWORD[(-16)+rsi],xmm14
vpshufb xmm14,xmm14,xmm0
vmovdqu XMMWORD[16+rsp],xmm9
vmovdqu xmm6,XMMWORD[48+rsp]
vmovdqu xmm0,XMMWORD[((16-32))+r9]
vpunpckhqdq xmm2,xmm6,xmm6
vpclmulqdq xmm5,xmm7,xmm3,0x00
vpxor xmm2,xmm2,xmm6
vpclmulqdq xmm7,xmm7,xmm3,0x11
vpclmulqdq xmm1,xmm1,xmm15,0x00
vmovdqu xmm9,XMMWORD[64+rsp]
vpclmulqdq xmm4,xmm6,xmm0,0x00
vmovdqu xmm3,XMMWORD[((48-32))+r9]
vpxor xmm4,xmm4,xmm5
vpunpckhqdq xmm5,xmm9,xmm9
vpclmulqdq xmm6,xmm6,xmm0,0x11
vpxor xmm5,xmm5,xmm9
vpxor xmm6,xmm6,xmm7
vpclmulqdq xmm2,xmm2,xmm15,0x10
vmovdqu xmm15,XMMWORD[((80-32))+r9]
vpxor xmm2,xmm2,xmm1
vmovdqu xmm1,XMMWORD[80+rsp]
vpclmulqdq xmm7,xmm9,xmm3,0x00
vmovdqu xmm0,XMMWORD[((64-32))+r9]
vpxor xmm7,xmm7,xmm4
vpunpckhqdq xmm4,xmm1,xmm1
vpclmulqdq xmm9,xmm9,xmm3,0x11
vpxor xmm4,xmm4,xmm1
vpxor xmm9,xmm9,xmm6
vpclmulqdq xmm5,xmm5,xmm15,0x00
vpxor xmm5,xmm5,xmm2
vmovdqu xmm2,XMMWORD[96+rsp]
vpclmulqdq xmm6,xmm1,xmm0,0x00
vmovdqu xmm3,XMMWORD[((96-32))+r9]
vpxor xmm6,xmm6,xmm7
vpunpckhqdq xmm7,xmm2,xmm2
vpclmulqdq xmm1,xmm1,xmm0,0x11
vpxor xmm7,xmm7,xmm2
vpxor xmm1,xmm1,xmm9
vpclmulqdq xmm4,xmm4,xmm15,0x10
vmovdqu xmm15,XMMWORD[((128-32))+r9]
vpxor xmm4,xmm4,xmm5
vpxor xmm8,xmm8,XMMWORD[112+rsp]
vpclmulqdq xmm5,xmm2,xmm3,0x00
vmovdqu xmm0,XMMWORD[((112-32))+r9]
vpunpckhqdq xmm9,xmm8,xmm8
vpxor xmm5,xmm5,xmm6
vpclmulqdq xmm2,xmm2,xmm3,0x11
vpxor xmm9,xmm9,xmm8
vpxor xmm2,xmm2,xmm1
vpclmulqdq xmm7,xmm7,xmm15,0x00
vpxor xmm4,xmm7,xmm4
vpclmulqdq xmm6,xmm8,xmm0,0x00
vmovdqu xmm3,XMMWORD[((0-32))+r9]
vpunpckhqdq xmm1,xmm14,xmm14
vpclmulqdq xmm8,xmm8,xmm0,0x11
vpxor xmm1,xmm1,xmm14
vpxor xmm5,xmm6,xmm5
vpclmulqdq xmm9,xmm9,xmm15,0x10
vmovdqu xmm15,XMMWORD[((32-32))+r9]
vpxor xmm7,xmm8,xmm2
vpxor xmm6,xmm9,xmm4
vmovdqu xmm0,XMMWORD[((16-32))+r9]
vpxor xmm9,xmm7,xmm5
vpclmulqdq xmm4,xmm14,xmm3,0x00
vpxor xmm6,xmm6,xmm9
vpunpckhqdq xmm2,xmm13,xmm13
vpclmulqdq xmm14,xmm14,xmm3,0x11
vpxor xmm2,xmm2,xmm13
vpslldq xmm9,xmm6,8
vpclmulqdq xmm1,xmm1,xmm15,0x00
vpxor xmm8,xmm5,xmm9
vpsrldq xmm6,xmm6,8
vpxor xmm7,xmm7,xmm6
vpclmulqdq xmm5,xmm13,xmm0,0x00
vmovdqu xmm3,XMMWORD[((48-32))+r9]
vpxor xmm5,xmm5,xmm4
vpunpckhqdq xmm9,xmm12,xmm12
vpclmulqdq xmm13,xmm13,xmm0,0x11
vpxor xmm9,xmm9,xmm12
vpxor xmm13,xmm13,xmm14
vpalignr xmm14,xmm8,xmm8,8
vpclmulqdq xmm2,xmm2,xmm15,0x10
vmovdqu xmm15,XMMWORD[((80-32))+r9]
vpxor xmm2,xmm2,xmm1
vpclmulqdq xmm4,xmm12,xmm3,0x00
vmovdqu xmm0,XMMWORD[((64-32))+r9]
vpxor xmm4,xmm4,xmm5
vpunpckhqdq xmm1,xmm11,xmm11
vpclmulqdq xmm12,xmm12,xmm3,0x11
vpxor xmm1,xmm1,xmm11
vpxor xmm12,xmm12,xmm13
vxorps xmm7,xmm7,XMMWORD[16+rsp]
vpclmulqdq xmm9,xmm9,xmm15,0x00
vpxor xmm9,xmm9,xmm2
vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10
vxorps xmm8,xmm8,xmm14
vpclmulqdq xmm5,xmm11,xmm0,0x00
vmovdqu xmm3,XMMWORD[((96-32))+r9]
vpxor xmm5,xmm5,xmm4
vpunpckhqdq xmm2,xmm10,xmm10
vpclmulqdq xmm11,xmm11,xmm0,0x11
vpxor xmm2,xmm2,xmm10
vpalignr xmm14,xmm8,xmm8,8
vpxor xmm11,xmm11,xmm12
vpclmulqdq xmm1,xmm1,xmm15,0x10
vmovdqu xmm15,XMMWORD[((128-32))+r9]
vpxor xmm1,xmm1,xmm9
vxorps xmm14,xmm14,xmm7
vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10
vxorps xmm8,xmm8,xmm14
vpclmulqdq xmm4,xmm10,xmm3,0x00
vmovdqu xmm0,XMMWORD[((112-32))+r9]
vpxor xmm4,xmm4,xmm5
vpunpckhqdq xmm9,xmm8,xmm8
vpclmulqdq xmm10,xmm10,xmm3,0x11
vpxor xmm9,xmm9,xmm8
vpxor xmm10,xmm10,xmm11
vpclmulqdq xmm2,xmm2,xmm15,0x00
vpxor xmm2,xmm2,xmm1
vpclmulqdq xmm5,xmm8,xmm0,0x00
vpclmulqdq xmm7,xmm8,xmm0,0x11
vpxor xmm5,xmm5,xmm4
vpclmulqdq xmm6,xmm9,xmm15,0x10
vpxor xmm7,xmm7,xmm10
vpxor xmm6,xmm6,xmm2
vpxor xmm4,xmm7,xmm5
vpxor xmm6,xmm6,xmm4
vpslldq xmm1,xmm6,8
vmovdqu xmm3,XMMWORD[16+r11]
vpsrldq xmm6,xmm6,8
vpxor xmm8,xmm5,xmm1
vpxor xmm7,xmm7,xmm6
vpalignr xmm2,xmm8,xmm8,8
vpclmulqdq xmm8,xmm8,xmm3,0x10
vpxor xmm8,xmm8,xmm2
vpalignr xmm2,xmm8,xmm8,8
vpclmulqdq xmm8,xmm8,xmm3,0x10
vpxor xmm2,xmm2,xmm7
vpxor xmm8,xmm8,xmm2
vpshufb xmm8,xmm8,XMMWORD[r11]
vmovdqu XMMWORD[(-64)+r9],xmm8
vzeroupper
movaps xmm6,XMMWORD[((-216))+rax]
movaps xmm7,XMMWORD[((-200))+rax]
movaps xmm8,XMMWORD[((-184))+rax]
movaps xmm9,XMMWORD[((-168))+rax]
movaps xmm10,XMMWORD[((-152))+rax]
movaps xmm11,XMMWORD[((-136))+rax]
movaps xmm12,XMMWORD[((-120))+rax]
movaps xmm13,XMMWORD[((-104))+rax]
movaps xmm14,XMMWORD[((-88))+rax]
movaps xmm15,XMMWORD[((-72))+rax]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
lea rsp,[rax]
$L$gcm_enc_abort:
mov rax,r10
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_aesni_gcm_encrypt:
ALIGN 64
$L$bswap_mask:
DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
$L$poly:
DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
$L$one_msb:
DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
$L$two_lsb:
DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
$L$one_lsb:
DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
ALIGN 64
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
gcm_se_handler:
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD[120+r8]
mov rbx,QWORD[248+r8]
mov rsi,QWORD[8+r9]
mov r11,QWORD[56+r9]
mov r10d,DWORD[r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jb NEAR $L$common_seh_tail
mov rax,QWORD[152+r8]
mov r10d,DWORD[4+r11]
lea r10,[r10*1+rsi]
cmp rbx,r10
jae NEAR $L$common_seh_tail
mov rax,QWORD[120+r8]
mov r15,QWORD[((-48))+rax]
mov r14,QWORD[((-40))+rax]
mov r13,QWORD[((-32))+rax]
mov r12,QWORD[((-24))+rax]
mov rbp,QWORD[((-16))+rax]
mov rbx,QWORD[((-8))+rax]
mov QWORD[240+r8],r15
mov QWORD[232+r8],r14
mov QWORD[224+r8],r13
mov QWORD[216+r8],r12
mov QWORD[160+r8],rbp
mov QWORD[144+r8],rbx
lea rsi,[((-216))+rax]
lea rdi,[512+r8]
mov ecx,20
DD 0xa548f3fc
$L$common_seh_tail:
mov rdi,QWORD[8+rax]
mov rsi,QWORD[16+rax]
mov QWORD[152+r8],rax
mov QWORD[168+r8],rsi
mov QWORD[176+r8],rdi
mov rdi,QWORD[40+r9]
mov rsi,r8
mov ecx,154
DD 0xa548f3fc
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD[8+rsi]
mov r8,QWORD[rsi]
mov r9,QWORD[16+rsi]
mov r10,QWORD[40+rsi]
lea r11,[56+rsi]
lea r12,[24+rsi]
mov QWORD[32+rsp],r10
mov QWORD[40+rsp],r11
mov QWORD[48+rsp],r12
mov QWORD[56+rsp],rcx
call QWORD[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
section .pdata rdata align=4
ALIGN 4
DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase
DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase
DD $L$SEH_gcm_dec_info wrt ..imagebase
DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase
DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase
DD $L$SEH_gcm_enc_info wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_gcm_dec_info:
DB 9,0,0,0
DD gcm_se_handler wrt ..imagebase
DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
$L$SEH_gcm_enc_info:
DB 9,0,0,0
DD gcm_se_handler wrt ..imagebase
DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase

1693
tmp64/aesni-mb-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

3229
tmp64/aesni-sha1-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

4027
tmp64/aesni-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

2733
tmp64/bsaes-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

2094
tmp64/cmll-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

2018
tmp64/ghash-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

776
tmp64/md5-x86_64.asm Normal file
View File

@@ -0,0 +1,776 @@
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64
ALIGN 16
global md5_block_asm_data_order
md5_block_asm_data_order:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_md5_block_asm_data_order:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
push rbp
push rbx
push r12
push r14
push r15
$L$prologue:
mov rbp,rdi
shl rdx,6
lea rdi,[rdx*1+rsi]
mov eax,DWORD[rbp]
mov ebx,DWORD[4+rbp]
mov ecx,DWORD[8+rbp]
mov edx,DWORD[12+rbp]
cmp rsi,rdi
je NEAR $L$end
$L$loop:
mov r8d,eax
mov r9d,ebx
mov r14d,ecx
mov r15d,edx
mov r10d,DWORD[rsi]
mov r11d,edx
xor r11d,ecx
lea eax,[((-680876936))+r10*1+rax]
and r11d,ebx
xor r11d,edx
mov r10d,DWORD[4+rsi]
add eax,r11d
rol eax,7
mov r11d,ecx
add eax,ebx
xor r11d,ebx
lea edx,[((-389564586))+r10*1+rdx]
and r11d,eax
xor r11d,ecx
mov r10d,DWORD[8+rsi]
add edx,r11d
rol edx,12
mov r11d,ebx
add edx,eax
xor r11d,eax
lea ecx,[606105819+r10*1+rcx]
and r11d,edx
xor r11d,ebx
mov r10d,DWORD[12+rsi]
add ecx,r11d
rol ecx,17
mov r11d,eax
add ecx,edx
xor r11d,edx
lea ebx,[((-1044525330))+r10*1+rbx]
and r11d,ecx
xor r11d,eax
mov r10d,DWORD[16+rsi]
add ebx,r11d
rol ebx,22
mov r11d,edx
add ebx,ecx
xor r11d,ecx
lea eax,[((-176418897))+r10*1+rax]
and r11d,ebx
xor r11d,edx
mov r10d,DWORD[20+rsi]
add eax,r11d
rol eax,7
mov r11d,ecx
add eax,ebx
xor r11d,ebx
lea edx,[1200080426+r10*1+rdx]
and r11d,eax
xor r11d,ecx
mov r10d,DWORD[24+rsi]
add edx,r11d
rol edx,12
mov r11d,ebx
add edx,eax
xor r11d,eax
lea ecx,[((-1473231341))+r10*1+rcx]
and r11d,edx
xor r11d,ebx
mov r10d,DWORD[28+rsi]
add ecx,r11d
rol ecx,17
mov r11d,eax
add ecx,edx
xor r11d,edx
lea ebx,[((-45705983))+r10*1+rbx]
and r11d,ecx
xor r11d,eax
mov r10d,DWORD[32+rsi]
add ebx,r11d
rol ebx,22
mov r11d,edx
add ebx,ecx
xor r11d,ecx
lea eax,[1770035416+r10*1+rax]
and r11d,ebx
xor r11d,edx
mov r10d,DWORD[36+rsi]
add eax,r11d
rol eax,7
mov r11d,ecx
add eax,ebx
xor r11d,ebx
lea edx,[((-1958414417))+r10*1+rdx]
and r11d,eax
xor r11d,ecx
mov r10d,DWORD[40+rsi]
add edx,r11d
rol edx,12
mov r11d,ebx
add edx,eax
xor r11d,eax
lea ecx,[((-42063))+r10*1+rcx]
and r11d,edx
xor r11d,ebx
mov r10d,DWORD[44+rsi]
add ecx,r11d
rol ecx,17
mov r11d,eax
add ecx,edx
xor r11d,edx
lea ebx,[((-1990404162))+r10*1+rbx]
and r11d,ecx
xor r11d,eax
mov r10d,DWORD[48+rsi]
add ebx,r11d
rol ebx,22
mov r11d,edx
add ebx,ecx
xor r11d,ecx
lea eax,[1804603682+r10*1+rax]
and r11d,ebx
xor r11d,edx
mov r10d,DWORD[52+rsi]
add eax,r11d
rol eax,7
mov r11d,ecx
add eax,ebx
xor r11d,ebx
lea edx,[((-40341101))+r10*1+rdx]
and r11d,eax
xor r11d,ecx
mov r10d,DWORD[56+rsi]
add edx,r11d
rol edx,12
mov r11d,ebx
add edx,eax
xor r11d,eax
lea ecx,[((-1502002290))+r10*1+rcx]
and r11d,edx
xor r11d,ebx
mov r10d,DWORD[60+rsi]
add ecx,r11d
rol ecx,17
mov r11d,eax
add ecx,edx
xor r11d,edx
lea ebx,[1236535329+r10*1+rbx]
and r11d,ecx
xor r11d,eax
mov r10d,DWORD[rsi]
add ebx,r11d
rol ebx,22
mov r11d,edx
add ebx,ecx
mov r10d,DWORD[4+rsi]
mov r11d,edx
mov r12d,edx
not r11d
lea eax,[((-165796510))+r10*1+rax]
and r12d,ebx
and r11d,ecx
mov r10d,DWORD[24+rsi]
or r12d,r11d
mov r11d,ecx
add eax,r12d
mov r12d,ecx
rol eax,5
add eax,ebx
not r11d
lea edx,[((-1069501632))+r10*1+rdx]
and r12d,eax
and r11d,ebx
mov r10d,DWORD[44+rsi]
or r12d,r11d
mov r11d,ebx
add edx,r12d
mov r12d,ebx
rol edx,9
add edx,eax
not r11d
lea ecx,[643717713+r10*1+rcx]
and r12d,edx
and r11d,eax
mov r10d,DWORD[rsi]
or r12d,r11d
mov r11d,eax
add ecx,r12d
mov r12d,eax
rol ecx,14
add ecx,edx
not r11d
lea ebx,[((-373897302))+r10*1+rbx]
and r12d,ecx
and r11d,edx
mov r10d,DWORD[20+rsi]
or r12d,r11d
mov r11d,edx
add ebx,r12d
mov r12d,edx
rol ebx,20
add ebx,ecx
not r11d
lea eax,[((-701558691))+r10*1+rax]
and r12d,ebx
and r11d,ecx
mov r10d,DWORD[40+rsi]
or r12d,r11d
mov r11d,ecx
add eax,r12d
mov r12d,ecx
rol eax,5
add eax,ebx
not r11d
lea edx,[38016083+r10*1+rdx]
and r12d,eax
and r11d,ebx
mov r10d,DWORD[60+rsi]
or r12d,r11d
mov r11d,ebx
add edx,r12d
mov r12d,ebx
rol edx,9
add edx,eax
not r11d
lea ecx,[((-660478335))+r10*1+rcx]
and r12d,edx
and r11d,eax
mov r10d,DWORD[16+rsi]
or r12d,r11d
mov r11d,eax
add ecx,r12d
mov r12d,eax
rol ecx,14
add ecx,edx
not r11d
lea ebx,[((-405537848))+r10*1+rbx]
and r12d,ecx
and r11d,edx
mov r10d,DWORD[36+rsi]
or r12d,r11d
mov r11d,edx
add ebx,r12d
mov r12d,edx
rol ebx,20
add ebx,ecx
not r11d
lea eax,[568446438+r10*1+rax]
and r12d,ebx
and r11d,ecx
mov r10d,DWORD[56+rsi]
or r12d,r11d
mov r11d,ecx
add eax,r12d
mov r12d,ecx
rol eax,5
add eax,ebx
not r11d
lea edx,[((-1019803690))+r10*1+rdx]
and r12d,eax
and r11d,ebx
mov r10d,DWORD[12+rsi]
or r12d,r11d
mov r11d,ebx
add edx,r12d
mov r12d,ebx
rol edx,9
add edx,eax
not r11d
lea ecx,[((-187363961))+r10*1+rcx]
and r12d,edx
and r11d,eax
mov r10d,DWORD[32+rsi]
or r12d,r11d
mov r11d,eax
add ecx,r12d
mov r12d,eax
rol ecx,14
add ecx,edx
not r11d
lea ebx,[1163531501+r10*1+rbx]
and r12d,ecx
and r11d,edx
mov r10d,DWORD[52+rsi]
or r12d,r11d
mov r11d,edx
add ebx,r12d
mov r12d,edx
rol ebx,20
add ebx,ecx
not r11d
lea eax,[((-1444681467))+r10*1+rax]
and r12d,ebx
and r11d,ecx
mov r10d,DWORD[8+rsi]
or r12d,r11d
mov r11d,ecx
add eax,r12d
mov r12d,ecx
rol eax,5
add eax,ebx
not r11d
lea edx,[((-51403784))+r10*1+rdx]
and r12d,eax
and r11d,ebx
mov r10d,DWORD[28+rsi]
or r12d,r11d
mov r11d,ebx
add edx,r12d
mov r12d,ebx
rol edx,9
add edx,eax
not r11d
lea ecx,[1735328473+r10*1+rcx]
and r12d,edx
and r11d,eax
mov r10d,DWORD[48+rsi]
or r12d,r11d
mov r11d,eax
add ecx,r12d
mov r12d,eax
rol ecx,14
add ecx,edx
not r11d
lea ebx,[((-1926607734))+r10*1+rbx]
and r12d,ecx
and r11d,edx
mov r10d,DWORD[rsi]
or r12d,r11d
mov r11d,edx
add ebx,r12d
mov r12d,edx
rol ebx,20
add ebx,ecx
mov r10d,DWORD[20+rsi]
mov r11d,ecx
lea eax,[((-378558))+r10*1+rax]
mov r10d,DWORD[32+rsi]
xor r11d,edx
xor r11d,ebx
add eax,r11d
rol eax,4
mov r11d,ebx
add eax,ebx
lea edx,[((-2022574463))+r10*1+rdx]
mov r10d,DWORD[44+rsi]
xor r11d,ecx
xor r11d,eax
add edx,r11d
rol edx,11
mov r11d,eax
add edx,eax
lea ecx,[1839030562+r10*1+rcx]
mov r10d,DWORD[56+rsi]
xor r11d,ebx
xor r11d,edx
add ecx,r11d
rol ecx,16
mov r11d,edx
add ecx,edx
lea ebx,[((-35309556))+r10*1+rbx]
mov r10d,DWORD[4+rsi]
xor r11d,eax
xor r11d,ecx
add ebx,r11d
rol ebx,23
mov r11d,ecx
add ebx,ecx
lea eax,[((-1530992060))+r10*1+rax]
mov r10d,DWORD[16+rsi]
xor r11d,edx
xor r11d,ebx
add eax,r11d
rol eax,4
mov r11d,ebx
add eax,ebx
lea edx,[1272893353+r10*1+rdx]
mov r10d,DWORD[28+rsi]
xor r11d,ecx
xor r11d,eax
add edx,r11d
rol edx,11
mov r11d,eax
add edx,eax
lea ecx,[((-155497632))+r10*1+rcx]
mov r10d,DWORD[40+rsi]
xor r11d,ebx
xor r11d,edx
add ecx,r11d
rol ecx,16
mov r11d,edx
add ecx,edx
lea ebx,[((-1094730640))+r10*1+rbx]
mov r10d,DWORD[52+rsi]
xor r11d,eax
xor r11d,ecx
add ebx,r11d
rol ebx,23
mov r11d,ecx
add ebx,ecx
lea eax,[681279174+r10*1+rax]
mov r10d,DWORD[rsi]
xor r11d,edx
xor r11d,ebx
add eax,r11d
rol eax,4
mov r11d,ebx
add eax,ebx
lea edx,[((-358537222))+r10*1+rdx]
mov r10d,DWORD[12+rsi]
xor r11d,ecx
xor r11d,eax
add edx,r11d
rol edx,11
mov r11d,eax
add edx,eax
lea ecx,[((-722521979))+r10*1+rcx]
mov r10d,DWORD[24+rsi]
xor r11d,ebx
xor r11d,edx
add ecx,r11d
rol ecx,16
mov r11d,edx
add ecx,edx
lea ebx,[76029189+r10*1+rbx]
mov r10d,DWORD[36+rsi]
xor r11d,eax
xor r11d,ecx
add ebx,r11d
rol ebx,23
mov r11d,ecx
add ebx,ecx
lea eax,[((-640364487))+r10*1+rax]
mov r10d,DWORD[48+rsi]
xor r11d,edx
xor r11d,ebx
add eax,r11d
rol eax,4
mov r11d,ebx
add eax,ebx
lea edx,[((-421815835))+r10*1+rdx]
mov r10d,DWORD[60+rsi]
xor r11d,ecx
xor r11d,eax
add edx,r11d
rol edx,11
mov r11d,eax
add edx,eax
lea ecx,[530742520+r10*1+rcx]
mov r10d,DWORD[8+rsi]
xor r11d,ebx
xor r11d,edx
add ecx,r11d
rol ecx,16
mov r11d,edx
add ecx,edx
lea ebx,[((-995338651))+r10*1+rbx]
mov r10d,DWORD[rsi]
xor r11d,eax
xor r11d,ecx
add ebx,r11d
rol ebx,23
mov r11d,ecx
add ebx,ecx
mov r10d,DWORD[rsi]
mov r11d,0xffffffff
xor r11d,edx
lea eax,[((-198630844))+r10*1+rax]
or r11d,ebx
xor r11d,ecx
add eax,r11d
mov r10d,DWORD[28+rsi]
mov r11d,0xffffffff
rol eax,6
xor r11d,ecx
add eax,ebx
lea edx,[1126891415+r10*1+rdx]
or r11d,eax
xor r11d,ebx
add edx,r11d
mov r10d,DWORD[56+rsi]
mov r11d,0xffffffff
rol edx,10
xor r11d,ebx
add edx,eax
lea ecx,[((-1416354905))+r10*1+rcx]
or r11d,edx
xor r11d,eax
add ecx,r11d
mov r10d,DWORD[20+rsi]
mov r11d,0xffffffff
rol ecx,15
xor r11d,eax
add ecx,edx
lea ebx,[((-57434055))+r10*1+rbx]
or r11d,ecx
xor r11d,edx
add ebx,r11d
mov r10d,DWORD[48+rsi]
mov r11d,0xffffffff
rol ebx,21
xor r11d,edx
add ebx,ecx
lea eax,[1700485571+r10*1+rax]
or r11d,ebx
xor r11d,ecx
add eax,r11d
mov r10d,DWORD[12+rsi]
mov r11d,0xffffffff
rol eax,6
xor r11d,ecx
add eax,ebx
lea edx,[((-1894986606))+r10*1+rdx]
or r11d,eax
xor r11d,ebx
add edx,r11d
mov r10d,DWORD[40+rsi]
mov r11d,0xffffffff
rol edx,10
xor r11d,ebx
add edx,eax
lea ecx,[((-1051523))+r10*1+rcx]
or r11d,edx
xor r11d,eax
add ecx,r11d
mov r10d,DWORD[4+rsi]
mov r11d,0xffffffff
rol ecx,15
xor r11d,eax
add ecx,edx
lea ebx,[((-2054922799))+r10*1+rbx]
or r11d,ecx
xor r11d,edx
add ebx,r11d
mov r10d,DWORD[32+rsi]
mov r11d,0xffffffff
rol ebx,21
xor r11d,edx
add ebx,ecx
lea eax,[1873313359+r10*1+rax]
or r11d,ebx
xor r11d,ecx
add eax,r11d
mov r10d,DWORD[60+rsi]
mov r11d,0xffffffff
rol eax,6
xor r11d,ecx
add eax,ebx
lea edx,[((-30611744))+r10*1+rdx]
or r11d,eax
xor r11d,ebx
add edx,r11d
mov r10d,DWORD[24+rsi]
mov r11d,0xffffffff
rol edx,10
xor r11d,ebx
add edx,eax
lea ecx,[((-1560198380))+r10*1+rcx]
or r11d,edx
xor r11d,eax
add ecx,r11d
mov r10d,DWORD[52+rsi]
mov r11d,0xffffffff
rol ecx,15
xor r11d,eax
add ecx,edx
lea ebx,[1309151649+r10*1+rbx]
or r11d,ecx
xor r11d,edx
add ebx,r11d
mov r10d,DWORD[16+rsi]
mov r11d,0xffffffff
rol ebx,21
xor r11d,edx
add ebx,ecx
lea eax,[((-145523070))+r10*1+rax]
or r11d,ebx
xor r11d,ecx
add eax,r11d
mov r10d,DWORD[44+rsi]
mov r11d,0xffffffff
rol eax,6
xor r11d,ecx
add eax,ebx
lea edx,[((-1120210379))+r10*1+rdx]
or r11d,eax
xor r11d,ebx
add edx,r11d
mov r10d,DWORD[8+rsi]
mov r11d,0xffffffff
rol edx,10
xor r11d,ebx
add edx,eax
lea ecx,[718787259+r10*1+rcx]
or r11d,edx
xor r11d,eax
add ecx,r11d
mov r10d,DWORD[36+rsi]
mov r11d,0xffffffff
rol ecx,15
xor r11d,eax
add ecx,edx
lea ebx,[((-343485551))+r10*1+rbx]
or r11d,ecx
xor r11d,edx
add ebx,r11d
mov r10d,DWORD[rsi]
mov r11d,0xffffffff
rol ebx,21
xor r11d,edx
add ebx,ecx
add eax,r8d
add ebx,r9d
add ecx,r14d
add edx,r15d
add rsi,64
cmp rsi,rdi
jb NEAR $L$loop
$L$end:
mov DWORD[rbp],eax
mov DWORD[4+rbp],ebx
mov DWORD[8+rbp],ecx
mov DWORD[12+rbp],edx
mov r15,QWORD[rsp]
mov r14,QWORD[8+rsp]
mov r12,QWORD[16+rsp]
mov rbx,QWORD[24+rsp]
mov rbp,QWORD[32+rsp]
add rsp,40
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_md5_block_asm_data_order:
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
se_handler:
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD[120+r8]
mov rbx,QWORD[248+r8]
lea r10,[$L$prologue]
cmp rbx,r10
jb NEAR $L$in_prologue
mov rax,QWORD[152+r8]
lea r10,[$L$epilogue]
cmp rbx,r10
jae NEAR $L$in_prologue
lea rax,[40+rax]
mov rbp,QWORD[((-8))+rax]
mov rbx,QWORD[((-16))+rax]
mov r12,QWORD[((-24))+rax]
mov r14,QWORD[((-32))+rax]
mov r15,QWORD[((-40))+rax]
mov QWORD[144+r8],rbx
mov QWORD[160+r8],rbp
mov QWORD[216+r8],r12
mov QWORD[232+r8],r14
mov QWORD[240+r8],r15
$L$in_prologue:
mov rdi,QWORD[8+rax]
mov rsi,QWORD[16+rax]
mov QWORD[152+r8],rax
mov QWORD[168+r8],rsi
mov QWORD[176+r8],rdi
mov rdi,QWORD[40+r9]
mov rsi,r8
mov ecx,154
DD 0xa548f3fc
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD[8+rsi]
mov r8,QWORD[rsi]
mov r9,QWORD[16+rsi]
mov r10,QWORD[40+rsi]
lea r11,[56+rsi]
lea r12,[24+rsi]
mov QWORD[32+rsp],r10
mov QWORD[40+rsp],r11
mov QWORD[48+rsp],r12
mov QWORD[56+rsp],rcx
call QWORD[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
section .pdata rdata align=4
ALIGN 4
DD $L$SEH_begin_md5_block_asm_data_order wrt ..imagebase
DD $L$SEH_end_md5_block_asm_data_order wrt ..imagebase
DD $L$SEH_info_md5_block_asm_data_order wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_md5_block_asm_data_order:
DB 9,0,0,0
DD se_handler wrt ..imagebase

1372
tmp64/rc4-md5-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

768
tmp64/rc4-x86_64.asm Normal file
View File

@@ -0,0 +1,768 @@
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64
EXTERN OPENSSL_ia32cap_P
global RC4
ALIGN 16
RC4:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_RC4:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
mov rcx,r9
or rsi,rsi
jne NEAR $L$entry
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$entry:
push rbx
push r12
push r13
$L$prologue:
mov r11,rsi
mov r12,rdx
mov r13,rcx
xor r10,r10
xor rcx,rcx
lea rdi,[8+rdi]
mov r10b,BYTE[((-8))+rdi]
mov cl,BYTE[((-4))+rdi]
cmp DWORD[256+rdi],-1
je NEAR $L$RC4_CHAR
mov r8d,DWORD[OPENSSL_ia32cap_P]
xor rbx,rbx
inc r10b
sub rbx,r10
sub r13,r12
mov eax,DWORD[r10*4+rdi]
test r11,-16
jz NEAR $L$loop1
bt r8d,30
jc NEAR $L$intel
and rbx,7
lea rsi,[1+r10]
jz NEAR $L$oop8
sub r11,rbx
$L$oop8_warmup:
add cl,al
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
mov DWORD[r10*4+rdi],edx
add al,dl
inc r10b
mov edx,DWORD[rax*4+rdi]
mov eax,DWORD[r10*4+rdi]
xor dl,BYTE[r12]
mov BYTE[r13*1+r12],dl
lea r12,[1+r12]
dec rbx
jnz NEAR $L$oop8_warmup
lea rsi,[1+r10]
jmp NEAR $L$oop8
ALIGN 16
$L$oop8:
add cl,al
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
mov ebx,DWORD[rsi*4+rdi]
ror r8,8
mov DWORD[r10*4+rdi],edx
add dl,al
mov r8b,BYTE[rdx*4+rdi]
add cl,bl
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
mov eax,DWORD[4+rsi*4+rdi]
ror r8,8
mov DWORD[4+r10*4+rdi],edx
add dl,bl
mov r8b,BYTE[rdx*4+rdi]
add cl,al
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
mov ebx,DWORD[8+rsi*4+rdi]
ror r8,8
mov DWORD[8+r10*4+rdi],edx
add dl,al
mov r8b,BYTE[rdx*4+rdi]
add cl,bl
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
mov eax,DWORD[12+rsi*4+rdi]
ror r8,8
mov DWORD[12+r10*4+rdi],edx
add dl,bl
mov r8b,BYTE[rdx*4+rdi]
add cl,al
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
mov ebx,DWORD[16+rsi*4+rdi]
ror r8,8
mov DWORD[16+r10*4+rdi],edx
add dl,al
mov r8b,BYTE[rdx*4+rdi]
add cl,bl
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
mov eax,DWORD[20+rsi*4+rdi]
ror r8,8
mov DWORD[20+r10*4+rdi],edx
add dl,bl
mov r8b,BYTE[rdx*4+rdi]
add cl,al
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
mov ebx,DWORD[24+rsi*4+rdi]
ror r8,8
mov DWORD[24+r10*4+rdi],edx
add dl,al
mov r8b,BYTE[rdx*4+rdi]
add sil,8
add cl,bl
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
mov eax,DWORD[((-4))+rsi*4+rdi]
ror r8,8
mov DWORD[28+r10*4+rdi],edx
add dl,bl
mov r8b,BYTE[rdx*4+rdi]
add r10b,8
ror r8,8
sub r11,8
xor r8,QWORD[r12]
mov QWORD[r13*1+r12],r8
lea r12,[8+r12]
test r11,-8
jnz NEAR $L$oop8
cmp r11,0
jne NEAR $L$loop1
jmp NEAR $L$exit
ALIGN 16
$L$intel:
test r11,-32
jz NEAR $L$loop1
and rbx,15
jz NEAR $L$oop16_is_hot
sub r11,rbx
$L$oop16_warmup:
add cl,al
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
mov DWORD[r10*4+rdi],edx
add al,dl
inc r10b
mov edx,DWORD[rax*4+rdi]
mov eax,DWORD[r10*4+rdi]
xor dl,BYTE[r12]
mov BYTE[r13*1+r12],dl
lea r12,[1+r12]
dec rbx
jnz NEAR $L$oop16_warmup
mov rbx,rcx
xor rcx,rcx
mov cl,bl
$L$oop16_is_hot:
lea rsi,[r10*4+rdi]
add cl,al
mov edx,DWORD[rcx*4+rdi]
pxor xmm0,xmm0
mov DWORD[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD[4+rsi]
movzx eax,al
mov DWORD[rsi],edx
add cl,bl
pinsrw xmm0,WORD[rax*4+rdi],0
jmp NEAR $L$oop16_enter
ALIGN 16
$L$oop16:
add cl,al
mov edx,DWORD[rcx*4+rdi]
pxor xmm2,xmm0
psllq xmm1,8
pxor xmm0,xmm0
mov DWORD[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD[4+rsi]
movzx eax,al
mov DWORD[rsi],edx
pxor xmm2,xmm1
add cl,bl
pinsrw xmm0,WORD[rax*4+rdi],0
movdqu XMMWORD[r13*1+r12],xmm2
lea r12,[16+r12]
$L$oop16_enter:
mov edx,DWORD[rcx*4+rdi]
pxor xmm1,xmm1
mov DWORD[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD[8+rsi]
movzx ebx,bl
mov DWORD[4+rsi],edx
add cl,al
pinsrw xmm1,WORD[rbx*4+rdi],0
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD[12+rsi]
movzx eax,al
mov DWORD[8+rsi],edx
add cl,bl
pinsrw xmm0,WORD[rax*4+rdi],1
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD[16+rsi]
movzx ebx,bl
mov DWORD[12+rsi],edx
add cl,al
pinsrw xmm1,WORD[rbx*4+rdi],1
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD[20+rsi]
movzx eax,al
mov DWORD[16+rsi],edx
add cl,bl
pinsrw xmm0,WORD[rax*4+rdi],2
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD[24+rsi]
movzx ebx,bl
mov DWORD[20+rsi],edx
add cl,al
pinsrw xmm1,WORD[rbx*4+rdi],2
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD[28+rsi]
movzx eax,al
mov DWORD[24+rsi],edx
add cl,bl
pinsrw xmm0,WORD[rax*4+rdi],3
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD[32+rsi]
movzx ebx,bl
mov DWORD[28+rsi],edx
add cl,al
pinsrw xmm1,WORD[rbx*4+rdi],3
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD[36+rsi]
movzx eax,al
mov DWORD[32+rsi],edx
add cl,bl
pinsrw xmm0,WORD[rax*4+rdi],4
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD[40+rsi]
movzx ebx,bl
mov DWORD[36+rsi],edx
add cl,al
pinsrw xmm1,WORD[rbx*4+rdi],4
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD[44+rsi]
movzx eax,al
mov DWORD[40+rsi],edx
add cl,bl
pinsrw xmm0,WORD[rax*4+rdi],5
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD[48+rsi]
movzx ebx,bl
mov DWORD[44+rsi],edx
add cl,al
pinsrw xmm1,WORD[rbx*4+rdi],5
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD[52+rsi]
movzx eax,al
mov DWORD[48+rsi],edx
add cl,bl
pinsrw xmm0,WORD[rax*4+rdi],6
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
add bl,dl
mov eax,DWORD[56+rsi]
movzx ebx,bl
mov DWORD[52+rsi],edx
add cl,al
pinsrw xmm1,WORD[rbx*4+rdi],6
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
add al,dl
mov ebx,DWORD[60+rsi]
movzx eax,al
mov DWORD[56+rsi],edx
add cl,bl
pinsrw xmm0,WORD[rax*4+rdi],7
add r10b,16
movdqu xmm2,XMMWORD[r12]
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],ebx
add bl,dl
movzx ebx,bl
mov DWORD[60+rsi],edx
lea rsi,[r10*4+rdi]
pinsrw xmm1,WORD[rbx*4+rdi],7
mov eax,DWORD[rsi]
mov rbx,rcx
xor rcx,rcx
sub r11,16
mov cl,bl
test r11,-16
jnz NEAR $L$oop16
psllq xmm1,8
pxor xmm2,xmm0
pxor xmm2,xmm1
movdqu XMMWORD[r13*1+r12],xmm2
lea r12,[16+r12]
cmp r11,0
jne NEAR $L$loop1
jmp NEAR $L$exit
ALIGN 16
$L$loop1:
add cl,al
mov edx,DWORD[rcx*4+rdi]
mov DWORD[rcx*4+rdi],eax
mov DWORD[r10*4+rdi],edx
add al,dl
inc r10b
mov edx,DWORD[rax*4+rdi]
mov eax,DWORD[r10*4+rdi]
xor dl,BYTE[r12]
mov BYTE[r13*1+r12],dl
lea r12,[1+r12]
dec r11
jnz NEAR $L$loop1
jmp NEAR $L$exit
ALIGN 16
$L$RC4_CHAR:
add r10b,1
movzx eax,BYTE[r10*1+rdi]
test r11,-8
jz NEAR $L$cloop1
jmp NEAR $L$cloop8
ALIGN 16
$L$cloop8:
mov r8d,DWORD[r12]
mov r9d,DWORD[4+r12]
add cl,al
lea rsi,[1+r10]
movzx edx,BYTE[rcx*1+rdi]
movzx esi,sil
movzx ebx,BYTE[rsi*1+rdi]
mov BYTE[rcx*1+rdi],al
cmp rcx,rsi
mov BYTE[r10*1+rdi],dl
jne NEAR $L$cmov0
mov rbx,rax
$L$cmov0:
add dl,al
xor r8b,BYTE[rdx*1+rdi]
ror r8d,8
add cl,bl
lea r10,[1+rsi]
movzx edx,BYTE[rcx*1+rdi]
movzx r10d,r10b
movzx eax,BYTE[r10*1+rdi]
mov BYTE[rcx*1+rdi],bl
cmp rcx,r10
mov BYTE[rsi*1+rdi],dl
jne NEAR $L$cmov1
mov rax,rbx
$L$cmov1:
add dl,bl
xor r8b,BYTE[rdx*1+rdi]
ror r8d,8
add cl,al
lea rsi,[1+r10]
movzx edx,BYTE[rcx*1+rdi]
movzx esi,sil
movzx ebx,BYTE[rsi*1+rdi]
mov BYTE[rcx*1+rdi],al
cmp rcx,rsi
mov BYTE[r10*1+rdi],dl
jne NEAR $L$cmov2
mov rbx,rax
$L$cmov2:
add dl,al
xor r8b,BYTE[rdx*1+rdi]
ror r8d,8
add cl,bl
lea r10,[1+rsi]
movzx edx,BYTE[rcx*1+rdi]
movzx r10d,r10b
movzx eax,BYTE[r10*1+rdi]
mov BYTE[rcx*1+rdi],bl
cmp rcx,r10
mov BYTE[rsi*1+rdi],dl
jne NEAR $L$cmov3
mov rax,rbx
$L$cmov3:
add dl,bl
xor r8b,BYTE[rdx*1+rdi]
ror r8d,8
add cl,al
lea rsi,[1+r10]
movzx edx,BYTE[rcx*1+rdi]
movzx esi,sil
movzx ebx,BYTE[rsi*1+rdi]
mov BYTE[rcx*1+rdi],al
cmp rcx,rsi
mov BYTE[r10*1+rdi],dl
jne NEAR $L$cmov4
mov rbx,rax
$L$cmov4:
add dl,al
xor r9b,BYTE[rdx*1+rdi]
ror r9d,8
add cl,bl
lea r10,[1+rsi]
movzx edx,BYTE[rcx*1+rdi]
movzx r10d,r10b
movzx eax,BYTE[r10*1+rdi]
mov BYTE[rcx*1+rdi],bl
cmp rcx,r10
mov BYTE[rsi*1+rdi],dl
jne NEAR $L$cmov5
mov rax,rbx
$L$cmov5:
add dl,bl
xor r9b,BYTE[rdx*1+rdi]
ror r9d,8
add cl,al
lea rsi,[1+r10]
movzx edx,BYTE[rcx*1+rdi]
movzx esi,sil
movzx ebx,BYTE[rsi*1+rdi]
mov BYTE[rcx*1+rdi],al
cmp rcx,rsi
mov BYTE[r10*1+rdi],dl
jne NEAR $L$cmov6
mov rbx,rax
$L$cmov6:
add dl,al
xor r9b,BYTE[rdx*1+rdi]
ror r9d,8
add cl,bl
lea r10,[1+rsi]
movzx edx,BYTE[rcx*1+rdi]
movzx r10d,r10b
movzx eax,BYTE[r10*1+rdi]
mov BYTE[rcx*1+rdi],bl
cmp rcx,r10
mov BYTE[rsi*1+rdi],dl
jne NEAR $L$cmov7
mov rax,rbx
$L$cmov7:
add dl,bl
xor r9b,BYTE[rdx*1+rdi]
ror r9d,8
lea r11,[((-8))+r11]
mov DWORD[r13],r8d
lea r12,[8+r12]
mov DWORD[4+r13],r9d
lea r13,[8+r13]
test r11,-8
jnz NEAR $L$cloop8
cmp r11,0
jne NEAR $L$cloop1
jmp NEAR $L$exit
ALIGN 16
$L$cloop1:
add cl,al
movzx ecx,cl
movzx edx,BYTE[rcx*1+rdi]
mov BYTE[rcx*1+rdi],al
mov BYTE[r10*1+rdi],dl
add dl,al
add r10b,1
movzx edx,dl
movzx r10d,r10b
movzx edx,BYTE[rdx*1+rdi]
movzx eax,BYTE[r10*1+rdi]
xor dl,BYTE[r12]
lea r12,[1+r12]
mov BYTE[r13],dl
lea r13,[1+r13]
sub r11,1
jnz NEAR $L$cloop1
jmp NEAR $L$exit
ALIGN 16
$L$exit:
sub r10b,1
mov DWORD[((-8))+rdi],r10d
mov DWORD[((-4))+rdi],ecx
mov r13,QWORD[rsp]
mov r12,QWORD[8+rsp]
mov rbx,QWORD[16+rsp]
add rsp,24
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_RC4:
global private_RC4_set_key
ALIGN 16
private_RC4_set_key:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_private_RC4_set_key:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
lea rdi,[8+rdi]
lea rdx,[rsi*1+rdx]
neg rsi
mov rcx,rsi
xor eax,eax
xor r9,r9
xor r10,r10
xor r11,r11
mov r8d,DWORD[OPENSSL_ia32cap_P]
bt r8d,20
jc NEAR $L$c1stloop
jmp NEAR $L$w1stloop
ALIGN 16
$L$w1stloop:
mov DWORD[rax*4+rdi],eax
add al,1
jnc NEAR $L$w1stloop
xor r9,r9
xor r8,r8
ALIGN 16
$L$w2ndloop:
mov r10d,DWORD[r9*4+rdi]
add r8b,BYTE[rsi*1+rdx]
add r8b,r10b
add rsi,1
mov r11d,DWORD[r8*4+rdi]
cmovz rsi,rcx
mov DWORD[r8*4+rdi],r10d
mov DWORD[r9*4+rdi],r11d
add r9b,1
jnc NEAR $L$w2ndloop
jmp NEAR $L$exit_key
ALIGN 16
$L$c1stloop:
mov BYTE[rax*1+rdi],al
add al,1
jnc NEAR $L$c1stloop
xor r9,r9
xor r8,r8
ALIGN 16
$L$c2ndloop:
mov r10b,BYTE[r9*1+rdi]
add r8b,BYTE[rsi*1+rdx]
add r8b,r10b
add rsi,1
mov r11b,BYTE[r8*1+rdi]
jnz NEAR $L$cnowrap
mov rsi,rcx
$L$cnowrap:
mov BYTE[r8*1+rdi],r10b
mov BYTE[r9*1+rdi],r11b
add r9b,1
jnc NEAR $L$c2ndloop
mov DWORD[256+rdi],-1
ALIGN 16
$L$exit_key:
xor eax,eax
mov DWORD[((-8))+rdi],eax
mov DWORD[((-4))+rdi],eax
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_private_RC4_set_key:
global RC4_options
ALIGN 16
RC4_options:
lea rax,[$L$opts]
mov edx,DWORD[OPENSSL_ia32cap_P]
bt edx,20
jc NEAR $L$8xchar
bt edx,30
jnc NEAR $L$done
add rax,25
DB 0F3h,0C3h ;repret
$L$8xchar:
add rax,12
$L$done:
DB 0F3h,0C3h ;repret
ALIGN 64
$L$opts:
DB 114,99,52,40,56,120,44,105,110,116,41,0
DB 114,99,52,40,56,120,44,99,104,97,114,41,0
DB 114,99,52,40,49,54,120,44,105,110,116,41,0
DB 82,67,52,32,102,111,114,32,120,56,54,95,54,52,44,32
DB 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
DB 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
DB 62,0
ALIGN 64
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
stream_se_handler:
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD[120+r8]
mov rbx,QWORD[248+r8]
lea r10,[$L$prologue]
cmp rbx,r10
jb NEAR $L$in_prologue
mov rax,QWORD[152+r8]
lea r10,[$L$epilogue]
cmp rbx,r10
jae NEAR $L$in_prologue
lea rax,[24+rax]
mov rbx,QWORD[((-8))+rax]
mov r12,QWORD[((-16))+rax]
mov r13,QWORD[((-24))+rax]
mov QWORD[144+r8],rbx
mov QWORD[216+r8],r12
mov QWORD[224+r8],r13
$L$in_prologue:
mov rdi,QWORD[8+rax]
mov rsi,QWORD[16+rax]
mov QWORD[152+r8],rax
mov QWORD[168+r8],rsi
mov QWORD[176+r8],rdi
jmp NEAR $L$common_seh_exit
ALIGN 16
key_se_handler:
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD[152+r8]
mov rdi,QWORD[8+rax]
mov rsi,QWORD[16+rax]
mov QWORD[168+r8],rsi
mov QWORD[176+r8],rdi
$L$common_seh_exit:
mov rdi,QWORD[40+r9]
mov rsi,r8
mov ecx,154
DD 0xa548f3fc
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD[8+rsi]
mov r8,QWORD[rsi]
mov r9,QWORD[16+rsi]
mov r10,QWORD[40+rsi]
lea r11,[56+rsi]
lea r12,[24+rsi]
mov QWORD[32+rsp],r10
mov QWORD[40+rsp],r11
mov QWORD[48+rsp],r12
mov QWORD[56+rsp],rcx
call QWORD[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
section .pdata rdata align=4
ALIGN 4
DD $L$SEH_begin_RC4 wrt ..imagebase
DD $L$SEH_end_RC4 wrt ..imagebase
DD $L$SEH_info_RC4 wrt ..imagebase
DD $L$SEH_begin_private_RC4_set_key wrt ..imagebase
DD $L$SEH_end_private_RC4_set_key wrt ..imagebase
DD $L$SEH_info_private_RC4_set_key wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_RC4:
DB 9,0,0,0
DD stream_se_handler wrt ..imagebase
$L$SEH_info_private_RC4_set_key:
DB 9,0,0,0
DD key_se_handler wrt ..imagebase

1927
tmp64/rsaz-avx2.asm Normal file

File diff suppressed because it is too large Load Diff

2149
tmp64/rsaz-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

7528
tmp64/sha1-mb-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

5714
tmp64/sha1-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

8209
tmp64/sha256-mb-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

5634
tmp64/sha256-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

5590
tmp64/sha512-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

1137
tmp64/vpaes-x86_64.asm Normal file

File diff suppressed because it is too large Load Diff

972
tmp64/wp-x86_64.asm Normal file
View File

@@ -0,0 +1,972 @@
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64
global whirlpool_block
ALIGN 16
whirlpool_block:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_whirlpool_block:
mov rdi,rcx
mov rsi,rdx
mov rdx,r8
push rbx
push rbp
push r12
push r13
push r14
push r15
mov r11,rsp
sub rsp,128+40
and rsp,-64
lea r10,[128+rsp]
mov QWORD[r10],rdi
mov QWORD[8+r10],rsi
mov QWORD[16+r10],rdx
mov QWORD[32+r10],r11
$L$prologue:
mov rbx,r10
lea rbp,[$L$table]
xor rcx,rcx
xor rdx,rdx
mov r8,QWORD[rdi]
mov r9,QWORD[8+rdi]
mov r10,QWORD[16+rdi]
mov r11,QWORD[24+rdi]
mov r12,QWORD[32+rdi]
mov r13,QWORD[40+rdi]
mov r14,QWORD[48+rdi]
mov r15,QWORD[56+rdi]
$L$outerloop:
mov QWORD[rsp],r8
mov QWORD[8+rsp],r9
mov QWORD[16+rsp],r10
mov QWORD[24+rsp],r11
mov QWORD[32+rsp],r12
mov QWORD[40+rsp],r13
mov QWORD[48+rsp],r14
mov QWORD[56+rsp],r15
xor r8,QWORD[rsi]
xor r9,QWORD[8+rsi]
xor r10,QWORD[16+rsi]
xor r11,QWORD[24+rsi]
xor r12,QWORD[32+rsi]
xor r13,QWORD[40+rsi]
xor r14,QWORD[48+rsi]
xor r15,QWORD[56+rsi]
mov QWORD[((64+0))+rsp],r8
mov QWORD[((64+8))+rsp],r9
mov QWORD[((64+16))+rsp],r10
mov QWORD[((64+24))+rsp],r11
mov QWORD[((64+32))+rsp],r12
mov QWORD[((64+40))+rsp],r13
mov QWORD[((64+48))+rsp],r14
mov QWORD[((64+56))+rsp],r15
xor rsi,rsi
mov QWORD[24+rbx],rsi
jmp NEAR $L$round
ALIGN 16
$L$round:
mov r8,QWORD[4096+rsi*8+rbp]
mov eax,DWORD[rsp]
mov ebx,DWORD[4+rsp]
movzx ecx,al
movzx edx,ah
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r8,QWORD[rsi*8+rbp]
mov r9,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((0+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
mov r10,QWORD[6+rsi*8+rbp]
mov r11,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
mov r12,QWORD[4+rsi*8+rbp]
mov r13,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((0+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
mov r14,QWORD[2+rsi*8+rbp]
mov r15,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r9,QWORD[rsi*8+rbp]
xor r10,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((8+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r11,QWORD[6+rsi*8+rbp]
xor r12,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r13,QWORD[4+rsi*8+rbp]
xor r14,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((8+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r15,QWORD[2+rsi*8+rbp]
xor r8,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r10,QWORD[rsi*8+rbp]
xor r11,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((16+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r12,QWORD[6+rsi*8+rbp]
xor r13,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r14,QWORD[4+rsi*8+rbp]
xor r15,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((16+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r8,QWORD[2+rsi*8+rbp]
xor r9,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r11,QWORD[rsi*8+rbp]
xor r12,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((24+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r13,QWORD[6+rsi*8+rbp]
xor r14,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r15,QWORD[4+rsi*8+rbp]
xor r8,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((24+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r9,QWORD[2+rsi*8+rbp]
xor r10,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r12,QWORD[rsi*8+rbp]
xor r13,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((32+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r14,QWORD[6+rsi*8+rbp]
xor r15,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r8,QWORD[4+rsi*8+rbp]
xor r9,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((32+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r10,QWORD[2+rsi*8+rbp]
xor r11,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r13,QWORD[rsi*8+rbp]
xor r14,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((40+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r15,QWORD[6+rsi*8+rbp]
xor r8,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r9,QWORD[4+rsi*8+rbp]
xor r10,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((40+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r11,QWORD[2+rsi*8+rbp]
xor r12,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r14,QWORD[rsi*8+rbp]
xor r15,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((48+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r8,QWORD[6+rsi*8+rbp]
xor r9,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r10,QWORD[4+rsi*8+rbp]
xor r11,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((48+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r12,QWORD[2+rsi*8+rbp]
xor r13,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r15,QWORD[rsi*8+rbp]
xor r8,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((56+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r9,QWORD[6+rsi*8+rbp]
xor r10,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r11,QWORD[4+rsi*8+rbp]
xor r12,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((56+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r13,QWORD[2+rsi*8+rbp]
xor r14,QWORD[1+rdi*8+rbp]
mov QWORD[rsp],r8
mov QWORD[8+rsp],r9
mov QWORD[16+rsp],r10
mov QWORD[24+rsp],r11
mov QWORD[32+rsp],r12
mov QWORD[40+rsp],r13
mov QWORD[48+rsp],r14
mov QWORD[56+rsp],r15
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r8,QWORD[rsi*8+rbp]
xor r9,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((64+0+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r10,QWORD[6+rsi*8+rbp]
xor r11,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r12,QWORD[4+rsi*8+rbp]
xor r13,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((64+0+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r14,QWORD[2+rsi*8+rbp]
xor r15,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r9,QWORD[rsi*8+rbp]
xor r10,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((64+8+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r11,QWORD[6+rsi*8+rbp]
xor r12,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r13,QWORD[4+rsi*8+rbp]
xor r14,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((64+8+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r15,QWORD[2+rsi*8+rbp]
xor r8,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r10,QWORD[rsi*8+rbp]
xor r11,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((64+16+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r12,QWORD[6+rsi*8+rbp]
xor r13,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r14,QWORD[4+rsi*8+rbp]
xor r15,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((64+16+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r8,QWORD[2+rsi*8+rbp]
xor r9,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r11,QWORD[rsi*8+rbp]
xor r12,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((64+24+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r13,QWORD[6+rsi*8+rbp]
xor r14,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r15,QWORD[4+rsi*8+rbp]
xor r8,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((64+24+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r9,QWORD[2+rsi*8+rbp]
xor r10,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r12,QWORD[rsi*8+rbp]
xor r13,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((64+32+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r14,QWORD[6+rsi*8+rbp]
xor r15,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r8,QWORD[4+rsi*8+rbp]
xor r9,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((64+32+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r10,QWORD[2+rsi*8+rbp]
xor r11,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r13,QWORD[rsi*8+rbp]
xor r14,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((64+40+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r15,QWORD[6+rsi*8+rbp]
xor r8,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r9,QWORD[4+rsi*8+rbp]
xor r10,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((64+40+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r11,QWORD[2+rsi*8+rbp]
xor r12,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r14,QWORD[rsi*8+rbp]
xor r15,QWORD[7+rdi*8+rbp]
mov eax,DWORD[((64+48+8))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r8,QWORD[6+rsi*8+rbp]
xor r9,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r10,QWORD[4+rsi*8+rbp]
xor r11,QWORD[3+rdi*8+rbp]
mov ebx,DWORD[((64+48+8+4))+rsp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r12,QWORD[2+rsi*8+rbp]
xor r13,QWORD[1+rdi*8+rbp]
shr eax,16
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r15,QWORD[rsi*8+rbp]
xor r8,QWORD[7+rdi*8+rbp]
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r9,QWORD[6+rsi*8+rbp]
xor r10,QWORD[5+rdi*8+rbp]
shr ebx,16
lea rsi,[rcx*1+rcx]
movzx ecx,bl
lea rdi,[rdx*1+rdx]
movzx edx,bh
xor r11,QWORD[4+rsi*8+rbp]
xor r12,QWORD[3+rdi*8+rbp]
lea rsi,[rcx*1+rcx]
movzx ecx,al
lea rdi,[rdx*1+rdx]
movzx edx,ah
xor r13,QWORD[2+rsi*8+rbp]
xor r14,QWORD[1+rdi*8+rbp]
lea rbx,[128+rsp]
mov rsi,QWORD[24+rbx]
add rsi,1
cmp rsi,10
je NEAR $L$roundsdone
mov QWORD[24+rbx],rsi
mov QWORD[((64+0))+rsp],r8
mov QWORD[((64+8))+rsp],r9
mov QWORD[((64+16))+rsp],r10
mov QWORD[((64+24))+rsp],r11
mov QWORD[((64+32))+rsp],r12
mov QWORD[((64+40))+rsp],r13
mov QWORD[((64+48))+rsp],r14
mov QWORD[((64+56))+rsp],r15
jmp NEAR $L$round
ALIGN 16
$L$roundsdone:
mov rdi,QWORD[rbx]
mov rsi,QWORD[8+rbx]
mov rax,QWORD[16+rbx]
xor r8,QWORD[rsi]
xor r9,QWORD[8+rsi]
xor r10,QWORD[16+rsi]
xor r11,QWORD[24+rsi]
xor r12,QWORD[32+rsi]
xor r13,QWORD[40+rsi]
xor r14,QWORD[48+rsi]
xor r15,QWORD[56+rsi]
xor r8,QWORD[rdi]
xor r9,QWORD[8+rdi]
xor r10,QWORD[16+rdi]
xor r11,QWORD[24+rdi]
xor r12,QWORD[32+rdi]
xor r13,QWORD[40+rdi]
xor r14,QWORD[48+rdi]
xor r15,QWORD[56+rdi]
mov QWORD[rdi],r8
mov QWORD[8+rdi],r9
mov QWORD[16+rdi],r10
mov QWORD[24+rdi],r11
mov QWORD[32+rdi],r12
mov QWORD[40+rdi],r13
mov QWORD[48+rdi],r14
mov QWORD[56+rdi],r15
lea rsi,[64+rsi]
sub rax,1
jz NEAR $L$alldone
mov QWORD[8+rbx],rsi
mov QWORD[16+rbx],rax
jmp NEAR $L$outerloop
$L$alldone:
mov rsi,QWORD[32+rbx]
mov r15,QWORD[rsi]
mov r14,QWORD[8+rsi]
mov r13,QWORD[16+rsi]
mov r12,QWORD[24+rsi]
mov rbp,QWORD[32+rsi]
mov rbx,QWORD[40+rsi]
lea rsp,[48+rsi]
$L$epilogue:
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_whirlpool_block:
ALIGN 64
$L$table:
DB 24,24,96,24,192,120,48,216,24,24,96,24,192,120,48,216
DB 35,35,140,35,5,175,70,38,35,35,140,35,5,175,70,38
DB 198,198,63,198,126,249,145,184,198,198,63,198,126,249,145,184
DB 232,232,135,232,19,111,205,251,232,232,135,232,19,111,205,251
DB 135,135,38,135,76,161,19,203,135,135,38,135,76,161,19,203
DB 184,184,218,184,169,98,109,17,184,184,218,184,169,98,109,17
DB 1,1,4,1,8,5,2,9,1,1,4,1,8,5,2,9
DB 79,79,33,79,66,110,158,13,79,79,33,79,66,110,158,13
DB 54,54,216,54,173,238,108,155,54,54,216,54,173,238,108,155
DB 166,166,162,166,89,4,81,255,166,166,162,166,89,4,81,255
DB 210,210,111,210,222,189,185,12,210,210,111,210,222,189,185,12
DB 245,245,243,245,251,6,247,14,245,245,243,245,251,6,247,14
DB 121,121,249,121,239,128,242,150,121,121,249,121,239,128,242,150
DB 111,111,161,111,95,206,222,48,111,111,161,111,95,206,222,48
DB 145,145,126,145,252,239,63,109,145,145,126,145,252,239,63,109
DB 82,82,85,82,170,7,164,248,82,82,85,82,170,7,164,248
DB 96,96,157,96,39,253,192,71,96,96,157,96,39,253,192,71
DB 188,188,202,188,137,118,101,53,188,188,202,188,137,118,101,53
DB 155,155,86,155,172,205,43,55,155,155,86,155,172,205,43,55
DB 142,142,2,142,4,140,1,138,142,142,2,142,4,140,1,138
DB 163,163,182,163,113,21,91,210,163,163,182,163,113,21,91,210
DB 12,12,48,12,96,60,24,108,12,12,48,12,96,60,24,108
DB 123,123,241,123,255,138,246,132,123,123,241,123,255,138,246,132
DB 53,53,212,53,181,225,106,128,53,53,212,53,181,225,106,128
DB 29,29,116,29,232,105,58,245,29,29,116,29,232,105,58,245
DB 224,224,167,224,83,71,221,179,224,224,167,224,83,71,221,179
DB 215,215,123,215,246,172,179,33,215,215,123,215,246,172,179,33
DB 194,194,47,194,94,237,153,156,194,194,47,194,94,237,153,156
DB 46,46,184,46,109,150,92,67,46,46,184,46,109,150,92,67
DB 75,75,49,75,98,122,150,41,75,75,49,75,98,122,150,41
DB 254,254,223,254,163,33,225,93,254,254,223,254,163,33,225,93
DB 87,87,65,87,130,22,174,213,87,87,65,87,130,22,174,213
DB 21,21,84,21,168,65,42,189,21,21,84,21,168,65,42,189
DB 119,119,193,119,159,182,238,232,119,119,193,119,159,182,238,232
DB 55,55,220,55,165,235,110,146,55,55,220,55,165,235,110,146
DB 229,229,179,229,123,86,215,158,229,229,179,229,123,86,215,158
DB 159,159,70,159,140,217,35,19,159,159,70,159,140,217,35,19
DB 240,240,231,240,211,23,253,35,240,240,231,240,211,23,253,35
DB 74,74,53,74,106,127,148,32,74,74,53,74,106,127,148,32
DB 218,218,79,218,158,149,169,68,218,218,79,218,158,149,169,68
DB 88,88,125,88,250,37,176,162,88,88,125,88,250,37,176,162
DB 201,201,3,201,6,202,143,207,201,201,3,201,6,202,143,207
DB 41,41,164,41,85,141,82,124,41,41,164,41,85,141,82,124
DB 10,10,40,10,80,34,20,90,10,10,40,10,80,34,20,90
DB 177,177,254,177,225,79,127,80,177,177,254,177,225,79,127,80
DB 160,160,186,160,105,26,93,201,160,160,186,160,105,26,93,201
DB 107,107,177,107,127,218,214,20,107,107,177,107,127,218,214,20
DB 133,133,46,133,92,171,23,217,133,133,46,133,92,171,23,217
DB 189,189,206,189,129,115,103,60,189,189,206,189,129,115,103,60
DB 93,93,105,93,210,52,186,143,93,93,105,93,210,52,186,143
DB 16,16,64,16,128,80,32,144,16,16,64,16,128,80,32,144
DB 244,244,247,244,243,3,245,7,244,244,247,244,243,3,245,7
DB 203,203,11,203,22,192,139,221,203,203,11,203,22,192,139,221
DB 62,62,248,62,237,198,124,211,62,62,248,62,237,198,124,211
DB 5,5,20,5,40,17,10,45,5,5,20,5,40,17,10,45
DB 103,103,129,103,31,230,206,120,103,103,129,103,31,230,206,120
DB 228,228,183,228,115,83,213,151,228,228,183,228,115,83,213,151
DB 39,39,156,39,37,187,78,2,39,39,156,39,37,187,78,2
DB 65,65,25,65,50,88,130,115,65,65,25,65,50,88,130,115
DB 139,139,22,139,44,157,11,167,139,139,22,139,44,157,11,167
DB 167,167,166,167,81,1,83,246,167,167,166,167,81,1,83,246
DB 125,125,233,125,207,148,250,178,125,125,233,125,207,148,250,178
DB 149,149,110,149,220,251,55,73,149,149,110,149,220,251,55,73
DB 216,216,71,216,142,159,173,86,216,216,71,216,142,159,173,86
DB 251,251,203,251,139,48,235,112,251,251,203,251,139,48,235,112
DB 238,238,159,238,35,113,193,205,238,238,159,238,35,113,193,205
DB 124,124,237,124,199,145,248,187,124,124,237,124,199,145,248,187
DB 102,102,133,102,23,227,204,113,102,102,133,102,23,227,204,113
DB 221,221,83,221,166,142,167,123,221,221,83,221,166,142,167,123
DB 23,23,92,23,184,75,46,175,23,23,92,23,184,75,46,175
DB 71,71,1,71,2,70,142,69,71,71,1,71,2,70,142,69
DB 158,158,66,158,132,220,33,26,158,158,66,158,132,220,33,26
DB 202,202,15,202,30,197,137,212,202,202,15,202,30,197,137,212
DB 45,45,180,45,117,153,90,88,45,45,180,45,117,153,90,88
DB 191,191,198,191,145,121,99,46,191,191,198,191,145,121,99,46
DB 7,7,28,7,56,27,14,63,7,7,28,7,56,27,14,63
DB 173,173,142,173,1,35,71,172,173,173,142,173,1,35,71,172
DB 90,90,117,90,234,47,180,176,90,90,117,90,234,47,180,176
DB 131,131,54,131,108,181,27,239,131,131,54,131,108,181,27,239
DB 51,51,204,51,133,255,102,182,51,51,204,51,133,255,102,182
DB 99,99,145,99,63,242,198,92,99,99,145,99,63,242,198,92
DB 2,2,8,2,16,10,4,18,2,2,8,2,16,10,4,18
DB 170,170,146,170,57,56,73,147,170,170,146,170,57,56,73,147
DB 113,113,217,113,175,168,226,222,113,113,217,113,175,168,226,222
DB 200,200,7,200,14,207,141,198,200,200,7,200,14,207,141,198
DB 25,25,100,25,200,125,50,209,25,25,100,25,200,125,50,209
DB 73,73,57,73,114,112,146,59,73,73,57,73,114,112,146,59
DB 217,217,67,217,134,154,175,95,217,217,67,217,134,154,175,95
DB 242,242,239,242,195,29,249,49,242,242,239,242,195,29,249,49
DB 227,227,171,227,75,72,219,168,227,227,171,227,75,72,219,168
DB 91,91,113,91,226,42,182,185,91,91,113,91,226,42,182,185
DB 136,136,26,136,52,146,13,188,136,136,26,136,52,146,13,188
DB 154,154,82,154,164,200,41,62,154,154,82,154,164,200,41,62
DB 38,38,152,38,45,190,76,11,38,38,152,38,45,190,76,11
DB 50,50,200,50,141,250,100,191,50,50,200,50,141,250,100,191
DB 176,176,250,176,233,74,125,89,176,176,250,176,233,74,125,89
DB 233,233,131,233,27,106,207,242,233,233,131,233,27,106,207,242
DB 15,15,60,15,120,51,30,119,15,15,60,15,120,51,30,119
DB 213,213,115,213,230,166,183,51,213,213,115,213,230,166,183,51
DB 128,128,58,128,116,186,29,244,128,128,58,128,116,186,29,244
DB 190,190,194,190,153,124,97,39,190,190,194,190,153,124,97,39
DB 205,205,19,205,38,222,135,235,205,205,19,205,38,222,135,235
DB 52,52,208,52,189,228,104,137,52,52,208,52,189,228,104,137
DB 72,72,61,72,122,117,144,50,72,72,61,72,122,117,144,50
DB 255,255,219,255,171,36,227,84,255,255,219,255,171,36,227,84
DB 122,122,245,122,247,143,244,141,122,122,245,122,247,143,244,141
DB 144,144,122,144,244,234,61,100,144,144,122,144,244,234,61,100
DB 95,95,97,95,194,62,190,157,95,95,97,95,194,62,190,157
DB 32,32,128,32,29,160,64,61,32,32,128,32,29,160,64,61
DB 104,104,189,104,103,213,208,15,104,104,189,104,103,213,208,15
DB 26,26,104,26,208,114,52,202,26,26,104,26,208,114,52,202
DB 174,174,130,174,25,44,65,183,174,174,130,174,25,44,65,183
DB 180,180,234,180,201,94,117,125,180,180,234,180,201,94,117,125
DB 84,84,77,84,154,25,168,206,84,84,77,84,154,25,168,206
DB 147,147,118,147,236,229,59,127,147,147,118,147,236,229,59,127
DB 34,34,136,34,13,170,68,47,34,34,136,34,13,170,68,47
DB 100,100,141,100,7,233,200,99,100,100,141,100,7,233,200,99
DB 241,241,227,241,219,18,255,42,241,241,227,241,219,18,255,42
DB 115,115,209,115,191,162,230,204,115,115,209,115,191,162,230,204
DB 18,18,72,18,144,90,36,130,18,18,72,18,144,90,36,130
DB 64,64,29,64,58,93,128,122,64,64,29,64,58,93,128,122
DB 8,8,32,8,64,40,16,72,8,8,32,8,64,40,16,72
DB 195,195,43,195,86,232,155,149,195,195,43,195,86,232,155,149
DB 236,236,151,236,51,123,197,223,236,236,151,236,51,123,197,223
DB 219,219,75,219,150,144,171,77,219,219,75,219,150,144,171,77
DB 161,161,190,161,97,31,95,192,161,161,190,161,97,31,95,192
DB 141,141,14,141,28,131,7,145,141,141,14,141,28,131,7,145
DB 61,61,244,61,245,201,122,200,61,61,244,61,245,201,122,200
DB 151,151,102,151,204,241,51,91,151,151,102,151,204,241,51,91
DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
DB 207,207,27,207,54,212,131,249,207,207,27,207,54,212,131,249
DB 43,43,172,43,69,135,86,110,43,43,172,43,69,135,86,110
DB 118,118,197,118,151,179,236,225,118,118,197,118,151,179,236,225
DB 130,130,50,130,100,176,25,230,130,130,50,130,100,176,25,230
DB 214,214,127,214,254,169,177,40,214,214,127,214,254,169,177,40
DB 27,27,108,27,216,119,54,195,27,27,108,27,216,119,54,195
DB 181,181,238,181,193,91,119,116,181,181,238,181,193,91,119,116
DB 175,175,134,175,17,41,67,190,175,175,134,175,17,41,67,190
DB 106,106,181,106,119,223,212,29,106,106,181,106,119,223,212,29
DB 80,80,93,80,186,13,160,234,80,80,93,80,186,13,160,234
DB 69,69,9,69,18,76,138,87,69,69,9,69,18,76,138,87
DB 243,243,235,243,203,24,251,56,243,243,235,243,203,24,251,56
DB 48,48,192,48,157,240,96,173,48,48,192,48,157,240,96,173
DB 239,239,155,239,43,116,195,196,239,239,155,239,43,116,195,196
DB 63,63,252,63,229,195,126,218,63,63,252,63,229,195,126,218
DB 85,85,73,85,146,28,170,199,85,85,73,85,146,28,170,199
DB 162,162,178,162,121,16,89,219,162,162,178,162,121,16,89,219
DB 234,234,143,234,3,101,201,233,234,234,143,234,3,101,201,233
DB 101,101,137,101,15,236,202,106,101,101,137,101,15,236,202,106
DB 186,186,210,186,185,104,105,3,186,186,210,186,185,104,105,3
DB 47,47,188,47,101,147,94,74,47,47,188,47,101,147,94,74
DB 192,192,39,192,78,231,157,142,192,192,39,192,78,231,157,142
DB 222,222,95,222,190,129,161,96,222,222,95,222,190,129,161,96
DB 28,28,112,28,224,108,56,252,28,28,112,28,224,108,56,252
DB 253,253,211,253,187,46,231,70,253,253,211,253,187,46,231,70
DB 77,77,41,77,82,100,154,31,77,77,41,77,82,100,154,31
DB 146,146,114,146,228,224,57,118,146,146,114,146,228,224,57,118
DB 117,117,201,117,143,188,234,250,117,117,201,117,143,188,234,250
DB 6,6,24,6,48,30,12,54,6,6,24,6,48,30,12,54
DB 138,138,18,138,36,152,9,174,138,138,18,138,36,152,9,174
DB 178,178,242,178,249,64,121,75,178,178,242,178,249,64,121,75
DB 230,230,191,230,99,89,209,133,230,230,191,230,99,89,209,133
DB 14,14,56,14,112,54,28,126,14,14,56,14,112,54,28,126
DB 31,31,124,31,248,99,62,231,31,31,124,31,248,99,62,231
DB 98,98,149,98,55,247,196,85,98,98,149,98,55,247,196,85
DB 212,212,119,212,238,163,181,58,212,212,119,212,238,163,181,58
DB 168,168,154,168,41,50,77,129,168,168,154,168,41,50,77,129
DB 150,150,98,150,196,244,49,82,150,150,98,150,196,244,49,82
DB 249,249,195,249,155,58,239,98,249,249,195,249,155,58,239,98
DB 197,197,51,197,102,246,151,163,197,197,51,197,102,246,151,163
DB 37,37,148,37,53,177,74,16,37,37,148,37,53,177,74,16
DB 89,89,121,89,242,32,178,171,89,89,121,89,242,32,178,171
DB 132,132,42,132,84,174,21,208,132,132,42,132,84,174,21,208
DB 114,114,213,114,183,167,228,197,114,114,213,114,183,167,228,197
DB 57,57,228,57,213,221,114,236,57,57,228,57,213,221,114,236
DB 76,76,45,76,90,97,152,22,76,76,45,76,90,97,152,22
DB 94,94,101,94,202,59,188,148,94,94,101,94,202,59,188,148
DB 120,120,253,120,231,133,240,159,120,120,253,120,231,133,240,159
DB 56,56,224,56,221,216,112,229,56,56,224,56,221,216,112,229
DB 140,140,10,140,20,134,5,152,140,140,10,140,20,134,5,152
DB 209,209,99,209,198,178,191,23,209,209,99,209,198,178,191,23
DB 165,165,174,165,65,11,87,228,165,165,174,165,65,11,87,228
DB 226,226,175,226,67,77,217,161,226,226,175,226,67,77,217,161
DB 97,97,153,97,47,248,194,78,97,97,153,97,47,248,194,78
DB 179,179,246,179,241,69,123,66,179,179,246,179,241,69,123,66
DB 33,33,132,33,21,165,66,52,33,33,132,33,21,165,66,52
DB 156,156,74,156,148,214,37,8,156,156,74,156,148,214,37,8
DB 30,30,120,30,240,102,60,238,30,30,120,30,240,102,60,238
DB 67,67,17,67,34,82,134,97,67,67,17,67,34,82,134,97
DB 199,199,59,199,118,252,147,177,199,199,59,199,118,252,147,177
DB 252,252,215,252,179,43,229,79,252,252,215,252,179,43,229,79
DB 4,4,16,4,32,20,8,36,4,4,16,4,32,20,8,36
DB 81,81,89,81,178,8,162,227,81,81,89,81,178,8,162,227
DB 153,153,94,153,188,199,47,37,153,153,94,153,188,199,47,37
DB 109,109,169,109,79,196,218,34,109,109,169,109,79,196,218,34
DB 13,13,52,13,104,57,26,101,13,13,52,13,104,57,26,101
DB 250,250,207,250,131,53,233,121,250,250,207,250,131,53,233,121
DB 223,223,91,223,182,132,163,105,223,223,91,223,182,132,163,105
DB 126,126,229,126,215,155,252,169,126,126,229,126,215,155,252,169
DB 36,36,144,36,61,180,72,25,36,36,144,36,61,180,72,25
DB 59,59,236,59,197,215,118,254,59,59,236,59,197,215,118,254
DB 171,171,150,171,49,61,75,154,171,171,150,171,49,61,75,154
DB 206,206,31,206,62,209,129,240,206,206,31,206,62,209,129,240
DB 17,17,68,17,136,85,34,153,17,17,68,17,136,85,34,153
DB 143,143,6,143,12,137,3,131,143,143,6,143,12,137,3,131
DB 78,78,37,78,74,107,156,4,78,78,37,78,74,107,156,4
DB 183,183,230,183,209,81,115,102,183,183,230,183,209,81,115,102
DB 235,235,139,235,11,96,203,224,235,235,139,235,11,96,203,224
DB 60,60,240,60,253,204,120,193,60,60,240,60,253,204,120,193
DB 129,129,62,129,124,191,31,253,129,129,62,129,124,191,31,253
DB 148,148,106,148,212,254,53,64,148,148,106,148,212,254,53,64
DB 247,247,251,247,235,12,243,28,247,247,251,247,235,12,243,28
DB 185,185,222,185,161,103,111,24,185,185,222,185,161,103,111,24
DB 19,19,76,19,152,95,38,139,19,19,76,19,152,95,38,139
DB 44,44,176,44,125,156,88,81,44,44,176,44,125,156,88,81
DB 211,211,107,211,214,184,187,5,211,211,107,211,214,184,187,5
DB 231,231,187,231,107,92,211,140,231,231,187,231,107,92,211,140
DB 110,110,165,110,87,203,220,57,110,110,165,110,87,203,220,57
DB 196,196,55,196,110,243,149,170,196,196,55,196,110,243,149,170
DB 3,3,12,3,24,15,6,27,3,3,12,3,24,15,6,27
DB 86,86,69,86,138,19,172,220,86,86,69,86,138,19,172,220
DB 68,68,13,68,26,73,136,94,68,68,13,68,26,73,136,94
DB 127,127,225,127,223,158,254,160,127,127,225,127,223,158,254,160
DB 169,169,158,169,33,55,79,136,169,169,158,169,33,55,79,136
DB 42,42,168,42,77,130,84,103,42,42,168,42,77,130,84,103
DB 187,187,214,187,177,109,107,10,187,187,214,187,177,109,107,10
DB 193,193,35,193,70,226,159,135,193,193,35,193,70,226,159,135
DB 83,83,81,83,162,2,166,241,83,83,81,83,162,2,166,241
DB 220,220,87,220,174,139,165,114,220,220,87,220,174,139,165,114
DB 11,11,44,11,88,39,22,83,11,11,44,11,88,39,22,83
DB 157,157,78,157,156,211,39,1,157,157,78,157,156,211,39,1
DB 108,108,173,108,71,193,216,43,108,108,173,108,71,193,216,43
DB 49,49,196,49,149,245,98,164,49,49,196,49,149,245,98,164
DB 116,116,205,116,135,185,232,243,116,116,205,116,135,185,232,243
DB 246,246,255,246,227,9,241,21,246,246,255,246,227,9,241,21
DB 70,70,5,70,10,67,140,76,70,70,5,70,10,67,140,76
DB 172,172,138,172,9,38,69,165,172,172,138,172,9,38,69,165
DB 137,137,30,137,60,151,15,181,137,137,30,137,60,151,15,181
DB 20,20,80,20,160,68,40,180,20,20,80,20,160,68,40,180
DB 225,225,163,225,91,66,223,186,225,225,163,225,91,66,223,186
DB 22,22,88,22,176,78,44,166,22,22,88,22,176,78,44,166
DB 58,58,232,58,205,210,116,247,58,58,232,58,205,210,116,247
DB 105,105,185,105,111,208,210,6,105,105,185,105,111,208,210,6
DB 9,9,36,9,72,45,18,65,9,9,36,9,72,45,18,65
DB 112,112,221,112,167,173,224,215,112,112,221,112,167,173,224,215
DB 182,182,226,182,217,84,113,111,182,182,226,182,217,84,113,111
DB 208,208,103,208,206,183,189,30,208,208,103,208,206,183,189,30
DB 237,237,147,237,59,126,199,214,237,237,147,237,59,126,199,214
DB 204,204,23,204,46,219,133,226,204,204,23,204,46,219,133,226
DB 66,66,21,66,42,87,132,104,66,66,21,66,42,87,132,104
DB 152,152,90,152,180,194,45,44,152,152,90,152,180,194,45,44
DB 164,164,170,164,73,14,85,237,164,164,170,164,73,14,85,237
DB 40,40,160,40,93,136,80,117,40,40,160,40,93,136,80,117
DB 92,92,109,92,218,49,184,134,92,92,109,92,218,49,184,134
DB 248,248,199,248,147,63,237,107,248,248,199,248,147,63,237,107
DB 134,134,34,134,68,164,17,194,134,134,34,134,68,164,17,194
DB 24,35,198,232,135,184,1,79
DB 54,166,210,245,121,111,145,82
DB 96,188,155,142,163,12,123,53
DB 29,224,215,194,46,75,254,87
DB 21,119,55,229,159,240,74,218
DB 88,201,41,10,177,160,107,133
DB 189,93,16,244,203,62,5,103
DB 228,39,65,139,167,125,149,216
DB 251,238,124,102,221,23,71,158
DB 202,45,191,7,173,90,131,51
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
se_handler:
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD[120+r8]
mov rbx,QWORD[248+r8]
lea r10,[$L$prologue]
cmp rbx,r10
jb NEAR $L$in_prologue
mov rax,QWORD[152+r8]
lea r10,[$L$epilogue]
cmp rbx,r10
jae NEAR $L$in_prologue
mov rax,QWORD[((128+32))+rax]
lea rax,[48+rax]
mov rbx,QWORD[((-8))+rax]
mov rbp,QWORD[((-16))+rax]
mov r12,QWORD[((-24))+rax]
mov r13,QWORD[((-32))+rax]
mov r14,QWORD[((-40))+rax]
mov r15,QWORD[((-48))+rax]
mov QWORD[144+r8],rbx
mov QWORD[160+r8],rbp
mov QWORD[216+r8],r12
mov QWORD[224+r8],r13
mov QWORD[232+r8],r14
mov QWORD[240+r8],r15
$L$in_prologue:
mov rdi,QWORD[8+rax]
mov rsi,QWORD[16+rax]
mov QWORD[152+r8],rax
mov QWORD[168+r8],rsi
mov QWORD[176+r8],rdi
mov rdi,QWORD[40+r9]
mov rsi,r8
mov ecx,154
DD 0xa548f3fc
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD[8+rsi]
mov r8,QWORD[rsi]
mov r9,QWORD[16+rsi]
mov r10,QWORD[40+rsi]
lea r11,[56+rsi]
lea r12,[24+rsi]
mov QWORD[32+rsp],r10
mov QWORD[40+rsp],r11
mov QWORD[48+rsp],r12
mov QWORD[56+rsp],rcx
call QWORD[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
section .pdata rdata align=4
ALIGN 4
DD $L$SEH_begin_whirlpool_block wrt ..imagebase
DD $L$SEH_end_whirlpool_block wrt ..imagebase
DD $L$SEH_info_whirlpool_block wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_whirlpool_block:
DB 9,0,0,0
DD se_handler wrt ..imagebase

398
tmp64/x86_64-gf2m.asm Normal file
View File

@@ -0,0 +1,398 @@
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
section .text code align=64
ALIGN 16
_mul_1x1:
sub rsp,128+8
mov r9,-1
lea rsi,[rax*1+rax]
shr r9,3
lea rdi,[rax*4]
and r9,rax
lea r12,[rax*8]
sar rax,63
lea r10,[r9*1+r9]
sar rsi,63
lea r11,[r9*4]
and rax,rbp
sar rdi,63
mov rdx,rax
shl rax,63
and rsi,rbp
shr rdx,1
mov rcx,rsi
shl rsi,62
and rdi,rbp
shr rcx,2
xor rax,rsi
mov rbx,rdi
shl rdi,61
xor rdx,rcx
shr rbx,3
xor rax,rdi
xor rdx,rbx
mov r13,r9
mov QWORD[rsp],0
xor r13,r10
mov QWORD[8+rsp],r9
mov r14,r11
mov QWORD[16+rsp],r10
xor r14,r12
mov QWORD[24+rsp],r13
xor r9,r11
mov QWORD[32+rsp],r11
xor r10,r11
mov QWORD[40+rsp],r9
xor r13,r11
mov QWORD[48+rsp],r10
xor r9,r14
mov QWORD[56+rsp],r13
xor r10,r14
mov QWORD[64+rsp],r12
xor r13,r14
mov QWORD[72+rsp],r9
xor r9,r11
mov QWORD[80+rsp],r10
xor r10,r11
mov QWORD[88+rsp],r13
xor r13,r11
mov QWORD[96+rsp],r14
mov rsi,r8
mov QWORD[104+rsp],r9
and rsi,rbp
mov QWORD[112+rsp],r10
shr rbp,4
mov QWORD[120+rsp],r13
mov rdi,r8
and rdi,rbp
shr rbp,4
movq xmm0,QWORD[rsi*8+rsp]
mov rsi,r8
and rsi,rbp
shr rbp,4
mov rcx,QWORD[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,4
and rdi,rbp
movq xmm1,QWORD[rsi*8+rsp]
shr rbx,60
xor rax,rcx
pslldq xmm1,1
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,12
and rdi,rbp
movq xmm1,QWORD[rsi*8+rsp]
shr rbx,52
xor rax,rcx
pslldq xmm1,2
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,20
and rdi,rbp
movq xmm1,QWORD[rsi*8+rsp]
shr rbx,44
xor rax,rcx
pslldq xmm1,3
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,28
and rdi,rbp
movq xmm1,QWORD[rsi*8+rsp]
shr rbx,36
xor rax,rcx
pslldq xmm1,4
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,36
and rdi,rbp
movq xmm1,QWORD[rsi*8+rsp]
shr rbx,28
xor rax,rcx
pslldq xmm1,5
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,44
and rdi,rbp
movq xmm1,QWORD[rsi*8+rsp]
shr rbx,20
xor rax,rcx
pslldq xmm1,6
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD[rdi*8+rsp]
mov rdi,r8
mov rbx,rcx
shl rcx,52
and rdi,rbp
movq xmm1,QWORD[rsi*8+rsp]
shr rbx,12
xor rax,rcx
pslldq xmm1,7
mov rsi,r8
shr rbp,4
xor rdx,rbx
and rsi,rbp
shr rbp,4
pxor xmm0,xmm1
mov rcx,QWORD[rdi*8+rsp]
mov rbx,rcx
shl rcx,60
DB 102,72,15,126,198
shr rbx,4
xor rax,rcx
psrldq xmm0,8
xor rdx,rbx
DB 102,72,15,126,199
xor rax,rsi
xor rdx,rdi
add rsp,128+8
DB 0F3h,0C3h ;repret
$L$end_mul_1x1:
EXTERN OPENSSL_ia32cap_P
global bn_GF2m_mul_2x2
ALIGN 16
bn_GF2m_mul_2x2:
mov rax,QWORD[OPENSSL_ia32cap_P]
bt rax,33
jnc NEAR $L$vanilla_mul_2x2
DB 102,72,15,110,194
DB 102,73,15,110,201
DB 102,73,15,110,208
movq xmm3,QWORD[40+rsp]
movdqa xmm4,xmm0
movdqa xmm5,xmm1
DB 102,15,58,68,193,0
pxor xmm4,xmm2
pxor xmm5,xmm3
DB 102,15,58,68,211,0
DB 102,15,58,68,229,0
xorps xmm4,xmm0
xorps xmm4,xmm2
movdqa xmm5,xmm4
pslldq xmm4,8
psrldq xmm5,8
pxor xmm2,xmm4
pxor xmm0,xmm5
movdqu XMMWORD[rcx],xmm2
movdqu XMMWORD[16+rcx],xmm0
DB 0F3h,0C3h ;repret
ALIGN 16
$L$vanilla_mul_2x2:
lea rsp,[((-136))+rsp]
mov r10,QWORD[176+rsp]
mov QWORD[120+rsp],rdi
mov QWORD[128+rsp],rsi
mov QWORD[80+rsp],r14
mov QWORD[88+rsp],r13
mov QWORD[96+rsp],r12
mov QWORD[104+rsp],rbp
mov QWORD[112+rsp],rbx
$L$body_mul_2x2:
mov QWORD[32+rsp],rcx
mov QWORD[40+rsp],rdx
mov QWORD[48+rsp],r8
mov QWORD[56+rsp],r9
mov QWORD[64+rsp],r10
mov r8,0xf
mov rax,rdx
mov rbp,r9
call _mul_1x1
mov QWORD[16+rsp],rax
mov QWORD[24+rsp],rdx
mov rax,QWORD[48+rsp]
mov rbp,QWORD[64+rsp]
call _mul_1x1
mov QWORD[rsp],rax
mov QWORD[8+rsp],rdx
mov rax,QWORD[40+rsp]
mov rbp,QWORD[56+rsp]
xor rax,QWORD[48+rsp]
xor rbp,QWORD[64+rsp]
call _mul_1x1
mov rbx,QWORD[rsp]
mov rcx,QWORD[8+rsp]
mov rdi,QWORD[16+rsp]
mov rsi,QWORD[24+rsp]
mov rbp,QWORD[32+rsp]
xor rax,rdx
xor rdx,rcx
xor rax,rbx
mov QWORD[rbp],rbx
xor rdx,rdi
mov QWORD[24+rbp],rsi
xor rax,rsi
xor rdx,rsi
xor rax,rdx
mov QWORD[16+rbp],rdx
mov QWORD[8+rbp],rax
mov r14,QWORD[80+rsp]
mov r13,QWORD[88+rsp]
mov r12,QWORD[96+rsp]
mov rbp,QWORD[104+rsp]
mov rbx,QWORD[112+rsp]
mov rdi,QWORD[120+rsp]
mov rsi,QWORD[128+rsp]
lea rsp,[136+rsp]
DB 0F3h,0C3h ;repret
$L$end_mul_2x2:
DB 71,70,40,50,94,109,41,32,77,117,108,116,105,112,108,105
DB 99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54
DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
DB 111,114,103,62,0
ALIGN 16
EXTERN __imp_RtlVirtualUnwind
ALIGN 16
se_handler:
push rsi
push rdi
push rbx
push rbp
push r12
push r13
push r14
push r15
pushfq
sub rsp,64
mov rax,QWORD[152+r8]
mov rbx,QWORD[248+r8]
lea r10,[$L$body_mul_2x2]
cmp rbx,r10
jb NEAR $L$in_prologue
mov r14,QWORD[80+rax]
mov r13,QWORD[88+rax]
mov r12,QWORD[96+rax]
mov rbp,QWORD[104+rax]
mov rbx,QWORD[112+rax]
mov rdi,QWORD[120+rax]
mov rsi,QWORD[128+rax]
mov QWORD[144+r8],rbx
mov QWORD[160+r8],rbp
mov QWORD[168+r8],rsi
mov QWORD[176+r8],rdi
mov QWORD[216+r8],r12
mov QWORD[224+r8],r13
mov QWORD[232+r8],r14
$L$in_prologue:
lea rax,[136+rax]
mov QWORD[152+r8],rax
mov rdi,QWORD[40+r9]
mov rsi,r8
mov ecx,154
DD 0xa548f3fc
mov rsi,r9
xor rcx,rcx
mov rdx,QWORD[8+rsi]
mov r8,QWORD[rsi]
mov r9,QWORD[16+rsi]
mov r10,QWORD[40+rsi]
lea r11,[56+rsi]
lea r12,[24+rsi]
mov QWORD[32+rsp],r10
mov QWORD[40+rsp],r11
mov QWORD[48+rsp],r12
mov QWORD[56+rsp],rcx
call QWORD[__imp_RtlVirtualUnwind]
mov eax,1
add rsp,64
popfq
pop r15
pop r14
pop r13
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
DB 0F3h,0C3h ;repret
section .pdata rdata align=4
ALIGN 4
DD _mul_1x1 wrt ..imagebase
DD $L$end_mul_1x1 wrt ..imagebase
DD $L$SEH_info_1x1 wrt ..imagebase
DD $L$vanilla_mul_2x2 wrt ..imagebase
DD $L$end_mul_2x2 wrt ..imagebase
DD $L$SEH_info_2x2 wrt ..imagebase
section .xdata rdata align=8
ALIGN 8
$L$SEH_info_1x1:
DB 0x01,0x07,0x02,0x00
DB 0x07,0x01,0x11,0x00
$L$SEH_info_2x2:
DB 9,0,0,0
DD se_handler wrt ..imagebase

1399
tmp64/x86_64-mont.asm Normal file

File diff suppressed because it is too large Load Diff

3906
tmp64/x86_64-mont5.asm Normal file

File diff suppressed because it is too large Load Diff

261
tmp64/x86_64cpuid.asm Normal file
View File

@@ -0,0 +1,261 @@
default rel
%define XMMWORD
%define YMMWORD
%define ZMMWORD
EXTERN OPENSSL_cpuid_setup
section .CRT$XCU rdata align=8
DQ OPENSSL_cpuid_setup
common OPENSSL_ia32cap_P 16
section .text code align=64
global OPENSSL_atomic_add
ALIGN 16
OPENSSL_atomic_add:
mov eax,DWORD[rcx]
$L$spin: lea r8,[rax*1+rdx]
DB 0xf0
cmpxchg DWORD[rcx],r8d
jne NEAR $L$spin
mov eax,r8d
DB 0x48,0x98
DB 0F3h,0C3h ;repret
global OPENSSL_rdtsc
ALIGN 16
OPENSSL_rdtsc:
rdtsc
shl rdx,32
or rax,rdx
DB 0F3h,0C3h ;repret
global OPENSSL_ia32_cpuid
ALIGN 16
OPENSSL_ia32_cpuid:
mov QWORD[8+rsp],rdi ;WIN64 prologue
mov QWORD[16+rsp],rsi
mov rax,rsp
$L$SEH_begin_OPENSSL_ia32_cpuid:
mov rdi,rcx
mov r8,rbx
xor eax,eax
mov DWORD[8+rdi],eax
cpuid
mov r11d,eax
xor eax,eax
cmp ebx,0x756e6547
setne al
mov r9d,eax
cmp edx,0x49656e69
setne al
or r9d,eax
cmp ecx,0x6c65746e
setne al
or r9d,eax
jz NEAR $L$intel
cmp ebx,0x68747541
setne al
mov r10d,eax
cmp edx,0x69746E65
setne al
or r10d,eax
cmp ecx,0x444D4163
setne al
or r10d,eax
jnz NEAR $L$intel
mov eax,0x80000000
cpuid
cmp eax,0x80000001
jb NEAR $L$intel
mov r10d,eax
mov eax,0x80000001
cpuid
or r9d,ecx
and r9d,0x00000801
cmp r10d,0x80000008
jb NEAR $L$intel
mov eax,0x80000008
cpuid
movzx r10,cl
inc r10
mov eax,1
cpuid
bt edx,28
jnc NEAR $L$generic
shr ebx,16
cmp bl,r10b
ja NEAR $L$generic
and edx,0xefffffff
jmp NEAR $L$generic
$L$intel:
cmp r11d,4
mov r10d,-1
jb NEAR $L$nocacheinfo
mov eax,4
mov ecx,0
cpuid
mov r10d,eax
shr r10d,14
and r10d,0xfff
cmp r11d,7
jb NEAR $L$nocacheinfo
mov eax,7
xor ecx,ecx
cpuid
mov DWORD[8+rdi],ebx
$L$nocacheinfo:
mov eax,1
cpuid
and edx,0xbfefffff
cmp r9d,0
jne NEAR $L$notintel
or edx,0x40000000
and ah,15
cmp ah,15
jne NEAR $L$notintel
or edx,0x00100000
$L$notintel:
bt edx,28
jnc NEAR $L$generic
and edx,0xefffffff
cmp r10d,0
je NEAR $L$generic
or edx,0x10000000
shr ebx,16
cmp bl,1
ja NEAR $L$generic
and edx,0xefffffff
$L$generic:
and r9d,0x00000800
and ecx,0xfffff7ff
or r9d,ecx
mov r10d,edx
bt r9d,27
jnc NEAR $L$clear_avx
xor ecx,ecx
DB 0x0f,0x01,0xd0
and eax,6
cmp eax,6
je NEAR $L$done
$L$clear_avx:
mov eax,0xefffe7ff
and r9d,eax
and DWORD[8+rdi],0xffffffdf
$L$done:
shl r9,32
mov eax,r10d
mov rbx,r8
or rax,r9
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
mov rsi,QWORD[16+rsp]
DB 0F3h,0C3h ;repret
$L$SEH_end_OPENSSL_ia32_cpuid:
global OPENSSL_cleanse
ALIGN 16
OPENSSL_cleanse:
xor rax,rax
cmp rdx,15
jae NEAR $L$ot
cmp rdx,0
je NEAR $L$ret
$L$ittle:
mov BYTE[rcx],al
sub rdx,1
lea rcx,[1+rcx]
jnz NEAR $L$ittle
$L$ret:
DB 0F3h,0C3h ;repret
ALIGN 16
$L$ot:
test rcx,7
jz NEAR $L$aligned
mov BYTE[rcx],al
lea rdx,[((-1))+rdx]
lea rcx,[1+rcx]
jmp NEAR $L$ot
$L$aligned:
mov QWORD[rcx],rax
lea rdx,[((-8))+rdx]
test rdx,-8
lea rcx,[8+rcx]
jnz NEAR $L$aligned
cmp rdx,0
jne NEAR $L$ittle
DB 0F3h,0C3h ;repret
global OPENSSL_wipe_cpu
ALIGN 16
OPENSSL_wipe_cpu:
pxor xmm0,xmm0
pxor xmm1,xmm1
pxor xmm2,xmm2
pxor xmm3,xmm3
pxor xmm4,xmm4
pxor xmm5,xmm5
xor rcx,rcx
xor rdx,rdx
xor r8,r8
xor r9,r9
xor r10,r10
xor r11,r11
lea rax,[8+rsp]
DB 0F3h,0C3h ;repret
global OPENSSL_ia32_rdrand
ALIGN 16
OPENSSL_ia32_rdrand:
mov ecx,8
$L$oop_rdrand:
DB 72,15,199,240
jc NEAR $L$break_rdrand
loop $L$oop_rdrand
$L$break_rdrand:
cmp rax,0
cmove rax,rcx
DB 0F3h,0C3h ;repret
global OPENSSL_ia32_rdseed
ALIGN 16
OPENSSL_ia32_rdseed:
mov ecx,8
$L$oop_rdseed:
DB 72,15,199,248
jc NEAR $L$break_rdseed
loop $L$oop_rdseed
$L$break_rdseed:
cmp rax,0
cmove rax,rcx
DB 0F3h,0C3h ;repret