Import OpenSSL 1.1.0f
This commit is contained in:
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
@@ -191,6 +198,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "x86asm.pl";
|
||||
|
||||
$output = pop;
|
||||
open OUT,">$output";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
&asm_init($ARGV[0],"aes-586.pl",$x86only = $ARGV[$#ARGV] eq "386");
|
||||
&static_label("AES_Te");
|
||||
&static_label("AES_Td");
|
||||
@@ -2861,12 +2872,12 @@ sub enckey()
|
||||
&set_label("exit");
|
||||
&function_end("_x86_AES_set_encrypt_key");
|
||||
|
||||
# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
|
||||
# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
|
||||
# AES_KEY *key)
|
||||
&function_begin_B("private_AES_set_encrypt_key");
|
||||
&function_begin_B("AES_set_encrypt_key");
|
||||
&call ("_x86_AES_set_encrypt_key");
|
||||
&ret ();
|
||||
&function_end_B("private_AES_set_encrypt_key");
|
||||
&function_end_B("AES_set_encrypt_key");
|
||||
|
||||
sub deckey()
|
||||
{ my ($i,$key,$tp1,$tp2,$tp4,$tp8) = @_;
|
||||
@@ -2923,9 +2934,9 @@ sub deckey()
|
||||
&mov (&DWP(4*$i,$key),$tp1);
|
||||
}
|
||||
|
||||
# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits,
|
||||
# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
|
||||
# AES_KEY *key)
|
||||
&function_begin_B("private_AES_set_decrypt_key");
|
||||
&function_begin_B("AES_set_decrypt_key");
|
||||
&call ("_x86_AES_set_encrypt_key");
|
||||
&cmp ("eax",0);
|
||||
&je (&label("proceed"));
|
||||
@@ -2981,7 +2992,9 @@ sub deckey()
|
||||
&jb (&label("permute"));
|
||||
|
||||
&xor ("eax","eax"); # return success
|
||||
&function_end("private_AES_set_decrypt_key");
|
||||
&function_end("AES_set_decrypt_key");
|
||||
&asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>");
|
||||
|
||||
&asm_finish();
|
||||
|
||||
close STDOUT;
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -32,8 +39,20 @@
|
||||
# Profiler-assisted and platform-specific optimization resulted in 16%
|
||||
# improvement on Cortex A8 core and ~21.5 cycles per byte.
|
||||
|
||||
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
$flavour = shift;
|
||||
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
|
||||
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
|
||||
|
||||
if ($flavour && $flavour ne "void") {
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open STDOUT,"| \"$^X\" $xlate $flavour $output";
|
||||
} else {
|
||||
open STDOUT,">$output";
|
||||
}
|
||||
|
||||
$s0="r0";
|
||||
$s1="r1";
|
||||
@@ -58,15 +77,12 @@ $code=<<___;
|
||||
#endif
|
||||
|
||||
.text
|
||||
#if __ARM_ARCH__<7
|
||||
.code 32
|
||||
#else
|
||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||
.syntax unified
|
||||
# ifdef __thumb2__
|
||||
.thumb
|
||||
# else
|
||||
#else
|
||||
.code 32
|
||||
# endif
|
||||
#undef __thumb2__
|
||||
#endif
|
||||
|
||||
.type AES_Te,%object
|
||||
@@ -181,15 +197,19 @@ AES_Te:
|
||||
.type AES_encrypt,%function
|
||||
.align 5
|
||||
AES_encrypt:
|
||||
#if __ARM_ARCH__<7
|
||||
#ifndef __thumb2__
|
||||
sub r3,pc,#8 @ AES_encrypt
|
||||
#else
|
||||
adr r3,AES_encrypt
|
||||
#endif
|
||||
stmdb sp!,{r1,r4-r12,lr}
|
||||
#ifdef __APPLE__
|
||||
adr $tbl,AES_Te
|
||||
#else
|
||||
sub $tbl,r3,#AES_encrypt-AES_Te @ Te
|
||||
#endif
|
||||
mov $rounds,r0 @ inp
|
||||
mov $key,r2
|
||||
sub $tbl,r3,#AES_encrypt-AES_Te @ Te
|
||||
#if __ARM_ARCH__<7
|
||||
ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
|
||||
ldrb $t1,[$rounds,#2] @ manner...
|
||||
@@ -422,24 +442,24 @@ _armv4_AES_encrypt:
|
||||
ldr pc,[sp],#4 @ pop and return
|
||||
.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
|
||||
|
||||
.global private_AES_set_encrypt_key
|
||||
.type private_AES_set_encrypt_key,%function
|
||||
.global AES_set_encrypt_key
|
||||
.type AES_set_encrypt_key,%function
|
||||
.align 5
|
||||
private_AES_set_encrypt_key:
|
||||
AES_set_encrypt_key:
|
||||
_armv4_AES_set_encrypt_key:
|
||||
#if __ARM_ARCH__<7
|
||||
#ifndef __thumb2__
|
||||
sub r3,pc,#8 @ AES_set_encrypt_key
|
||||
#else
|
||||
adr r3,private_AES_set_encrypt_key
|
||||
adr r3,AES_set_encrypt_key
|
||||
#endif
|
||||
teq r0,#0
|
||||
#if __ARM_ARCH__>=7
|
||||
#ifdef __thumb2__
|
||||
itt eq @ Thumb2 thing, sanity check in ARM
|
||||
#endif
|
||||
moveq r0,#-1
|
||||
beq .Labrt
|
||||
teq r2,#0
|
||||
#if __ARM_ARCH__>=7
|
||||
#ifdef __thumb2__
|
||||
itt eq @ Thumb2 thing, sanity check in ARM
|
||||
#endif
|
||||
moveq r0,#-1
|
||||
@@ -450,19 +470,23 @@ _armv4_AES_set_encrypt_key:
|
||||
teq r1,#192
|
||||
beq .Lok
|
||||
teq r1,#256
|
||||
#if __ARM_ARCH__>=7
|
||||
#ifdef __thumb2__
|
||||
itt ne @ Thumb2 thing, sanity check in ARM
|
||||
#endif
|
||||
movne r0,#-1
|
||||
bne .Labrt
|
||||
|
||||
.Lok: stmdb sp!,{r4-r12,lr}
|
||||
sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
|
||||
|
||||
mov $rounds,r0 @ inp
|
||||
mov lr,r1 @ bits
|
||||
mov $key,r2 @ key
|
||||
|
||||
#ifdef __APPLE__
|
||||
adr $tbl,AES_Te+1024 @ Te4
|
||||
#else
|
||||
sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
|
||||
#endif
|
||||
|
||||
#if __ARM_ARCH__<7
|
||||
ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
|
||||
ldrb $t1,[$rounds,#2] @ manner...
|
||||
@@ -607,7 +631,7 @@ _armv4_AES_set_encrypt_key:
|
||||
str $s2,[$key,#-16]
|
||||
subs $rounds,$rounds,#1
|
||||
str $s3,[$key,#-12]
|
||||
#if __ARM_ARCH__>=7
|
||||
#ifdef __thumb2__
|
||||
itt eq @ Thumb2 thing, sanity check in ARM
|
||||
#endif
|
||||
subeq r2,$key,#216
|
||||
@@ -679,7 +703,7 @@ _armv4_AES_set_encrypt_key:
|
||||
str $s2,[$key,#-24]
|
||||
subs $rounds,$rounds,#1
|
||||
str $s3,[$key,#-20]
|
||||
#if __ARM_ARCH__>=7
|
||||
#ifdef __thumb2__
|
||||
itt eq @ Thumb2 thing, sanity check in ARM
|
||||
#endif
|
||||
subeq r2,$key,#256
|
||||
@@ -722,12 +746,12 @@ _armv4_AES_set_encrypt_key:
|
||||
moveq pc,lr @ be binary compatible with V4, yet
|
||||
bx lr @ interoperable with Thumb ISA:-)
|
||||
#endif
|
||||
.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
|
||||
.size AES_set_encrypt_key,.-AES_set_encrypt_key
|
||||
|
||||
.global private_AES_set_decrypt_key
|
||||
.type private_AES_set_decrypt_key,%function
|
||||
.global AES_set_decrypt_key
|
||||
.type AES_set_decrypt_key,%function
|
||||
.align 5
|
||||
private_AES_set_decrypt_key:
|
||||
AES_set_decrypt_key:
|
||||
str lr,[sp,#-4]! @ push lr
|
||||
bl _armv4_AES_set_encrypt_key
|
||||
teq r0,#0
|
||||
@@ -737,7 +761,7 @@ private_AES_set_decrypt_key:
|
||||
mov r0,r2 @ AES_set_encrypt_key preserves r2,
|
||||
mov r1,r2 @ which is AES_KEY *key
|
||||
b _armv4_AES_set_enc2dec_key
|
||||
.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
|
||||
.size AES_set_decrypt_key,.-AES_set_decrypt_key
|
||||
|
||||
@ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
|
||||
.global AES_set_enc2dec_key
|
||||
@@ -750,7 +774,7 @@ _armv4_AES_set_enc2dec_key:
|
||||
ldr $rounds,[r0,#240]
|
||||
mov $i1,r0 @ input
|
||||
add $i2,r0,$rounds,lsl#4
|
||||
mov $key,r1 @ ouput
|
||||
mov $key,r1 @ output
|
||||
add $tbl,r1,$rounds,lsl#4
|
||||
str $rounds,[r1,#240]
|
||||
|
||||
@@ -949,15 +973,19 @@ AES_Td:
|
||||
.type AES_decrypt,%function
|
||||
.align 5
|
||||
AES_decrypt:
|
||||
#if __ARM_ARCH__<7
|
||||
#ifndef __thumb2__
|
||||
sub r3,pc,#8 @ AES_decrypt
|
||||
#else
|
||||
adr r3,AES_decrypt
|
||||
#endif
|
||||
stmdb sp!,{r1,r4-r12,lr}
|
||||
#ifdef __APPLE__
|
||||
adr $tbl,AES_Td
|
||||
#else
|
||||
sub $tbl,r3,#AES_decrypt-AES_Td @ Td
|
||||
#endif
|
||||
mov $rounds,r0 @ inp
|
||||
mov $key,r2
|
||||
sub $tbl,r3,#AES_decrypt-AES_Td @ Td
|
||||
#if __ARM_ARCH__<7
|
||||
ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
|
||||
ldrb $t1,[$rounds,#2] @ manner...
|
||||
|
||||
1382
crypto/aes/asm/aes-c64xplus.pl
Normal file
1382
crypto/aes/asm/aes-c64xplus.pl
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,3 +1,10 @@
|
||||
// Copyright 2004-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
// this file except in compliance with the License. You can obtain a copy
|
||||
// in the file LICENSE in the source distribution or at
|
||||
// https://www.openssl.org/source/license.html
|
||||
//
|
||||
// ====================================================================
|
||||
// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
// project. Rights for redistribution and usage in source and binary
|
||||
@@ -10,7 +17,7 @@
|
||||
// 'and' which in turn can be assigned to M-port [there're double as
|
||||
// much M-ports as there're I-ports on Itanium 2]. By sacrificing few
|
||||
// registers for small constants (255, 24 and 16) to be used with
|
||||
// 'shr' and 'and' instructions I can achieve better ILP, Intruction
|
||||
// 'shr' and 'and' instructions I can achieve better ILP, Instruction
|
||||
// Level Parallelism, and performance. This code outperforms GCC 3.3
|
||||
// generated code by over factor of 2 (two), GCC 3.4 - by 70% and
|
||||
// HP C - by 40%. Measured best-case scenario, i.e. aligned
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -57,6 +64,7 @@
|
||||
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
|
||||
|
||||
if ($flavour =~ /64|n32/i) {
|
||||
$PTR_LA="dla";
|
||||
$PTR_ADD="dadd"; # incidentally works even on n32
|
||||
$PTR_SUB="dsub"; # incidentally works even on n32
|
||||
$PTR_INS="dins";
|
||||
@@ -65,6 +73,7 @@ if ($flavour =~ /64|n32/i) {
|
||||
$PTR_SLL="dsll"; # incidentally works even on n32
|
||||
$SZREG=8;
|
||||
} else {
|
||||
$PTR_LA="la";
|
||||
$PTR_ADD="add";
|
||||
$PTR_SUB="sub";
|
||||
$PTR_INS="ins";
|
||||
@@ -81,13 +90,13 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2;
|
||||
|
||||
$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC});
|
||||
|
||||
for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); }
|
||||
for (@ARGV) { $output=$_ if (/\w[\w\-]*\.\w+$/); }
|
||||
open STDOUT,">$output";
|
||||
|
||||
if (!defined($big_endian))
|
||||
{ $big_endian=(unpack('L',pack('N',1))==1); }
|
||||
|
||||
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
|
||||
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
|
||||
my ($MSB,$LSB)=(0,3); # automatically converted to little-endian
|
||||
@@ -110,7 +119,7 @@ ___
|
||||
|
||||
{{{
|
||||
my $FRAMESIZE=16*$SZREG;
|
||||
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc0fff008 : 0xc0ff0000;
|
||||
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc0fff008" : "0xc0ff0000";
|
||||
|
||||
my ($inp,$out,$key,$Tbl,$s0,$s1,$s2,$s3)=($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7);
|
||||
my ($i0,$i1,$i2,$i3)=($at,$t0,$t1,$t2);
|
||||
@@ -646,7 +655,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
|
||||
___
|
||||
$code.=<<___;
|
||||
.set reorder
|
||||
la $Tbl,AES_Te # PIC-ified 'load address'
|
||||
$PTR_LA $Tbl,AES_Te # PIC-ified 'load address'
|
||||
|
||||
lwl $s0,0+$MSB($inp)
|
||||
lwl $s1,4+$MSB($inp)
|
||||
@@ -1217,7 +1226,7 @@ $code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
|
||||
___
|
||||
$code.=<<___;
|
||||
.set reorder
|
||||
la $Tbl,AES_Td # PIC-ified 'load address'
|
||||
$PTR_LA $Tbl,AES_Td # PIC-ified 'load address'
|
||||
|
||||
lwl $s0,0+$MSB($inp)
|
||||
lwl $s1,4+$MSB($inp)
|
||||
@@ -1267,7 +1276,7 @@ ___
|
||||
|
||||
{{{
|
||||
my $FRAMESIZE=8*$SZREG;
|
||||
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0xc000f008 : 0xc0000000;
|
||||
my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0xc000f008" : "0xc0000000";
|
||||
|
||||
my ($inp,$bits,$key,$Tbl)=($a0,$a1,$a2,$a3);
|
||||
my ($rk0,$rk1,$rk2,$rk3,$rk4,$rk5,$rk6,$rk7)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
|
||||
@@ -1528,9 +1537,9 @@ _mips_AES_set_encrypt_key:
|
||||
nop
|
||||
.end _mips_AES_set_encrypt_key
|
||||
|
||||
.globl private_AES_set_encrypt_key
|
||||
.ent private_AES_set_encrypt_key
|
||||
private_AES_set_encrypt_key:
|
||||
.globl AES_set_encrypt_key
|
||||
.ent AES_set_encrypt_key
|
||||
AES_set_encrypt_key:
|
||||
.frame $sp,$FRAMESIZE,$ra
|
||||
.mask $SAVED_REGS_MASK,-$SZREG
|
||||
.set noreorder
|
||||
@@ -1552,11 +1561,11 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
|
||||
.cplocal $Tbl
|
||||
.cpsetup $pf,$zero,private_AES_set_encrypt_key
|
||||
.cpsetup $pf,$zero,AES_set_encrypt_key
|
||||
___
|
||||
$code.=<<___;
|
||||
.set reorder
|
||||
la $Tbl,AES_Te4 # PIC-ified 'load address'
|
||||
$PTR_LA $Tbl,AES_Te4 # PIC-ified 'load address'
|
||||
|
||||
bal _mips_AES_set_encrypt_key
|
||||
|
||||
@@ -1575,7 +1584,7 @@ ___
|
||||
$code.=<<___;
|
||||
jr $ra
|
||||
$PTR_ADD $sp,$FRAMESIZE
|
||||
.end private_AES_set_encrypt_key
|
||||
.end AES_set_encrypt_key
|
||||
___
|
||||
|
||||
my ($head,$tail)=($inp,$bits);
|
||||
@@ -1583,9 +1592,9 @@ my ($tp1,$tp2,$tp4,$tp8,$tp9,$tpb,$tpd,$tpe)=($a4,$a5,$a6,$a7,$s0,$s1,$s2,$s3);
|
||||
my ($m,$x80808080,$x7f7f7f7f,$x1b1b1b1b)=($at,$t0,$t1,$t2);
|
||||
$code.=<<___;
|
||||
.align 5
|
||||
.globl private_AES_set_decrypt_key
|
||||
.ent private_AES_set_decrypt_key
|
||||
private_AES_set_decrypt_key:
|
||||
.globl AES_set_decrypt_key
|
||||
.ent AES_set_decrypt_key
|
||||
AES_set_decrypt_key:
|
||||
.frame $sp,$FRAMESIZE,$ra
|
||||
.mask $SAVED_REGS_MASK,-$SZREG
|
||||
.set noreorder
|
||||
@@ -1607,11 +1616,11 @@ $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
|
||||
___
|
||||
$code.=<<___ if ($flavour !~ /o32/i); # non-o32 PIC-ification
|
||||
.cplocal $Tbl
|
||||
.cpsetup $pf,$zero,private_AES_set_decrypt_key
|
||||
.cpsetup $pf,$zero,AES_set_decrypt_key
|
||||
___
|
||||
$code.=<<___;
|
||||
.set reorder
|
||||
la $Tbl,AES_Te4 # PIC-ified 'load address'
|
||||
$PTR_LA $Tbl,AES_Te4 # PIC-ified 'load address'
|
||||
|
||||
bal _mips_AES_set_encrypt_key
|
||||
|
||||
@@ -1729,7 +1738,7 @@ ___
|
||||
$code.=<<___;
|
||||
jr $ra
|
||||
$PTR_ADD $sp,$FRAMESIZE
|
||||
.end private_AES_set_decrypt_key
|
||||
.end AES_set_decrypt_key
|
||||
___
|
||||
}}}
|
||||
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
@@ -19,7 +26,7 @@
|
||||
# February 2010
|
||||
#
|
||||
# Rescheduling instructions to favour Power6 pipeline gave 10%
|
||||
# performance improvement on the platfrom in question (and marginal
|
||||
# performance improvement on the platform in question (and marginal
|
||||
# improvement even on others). It should be noted that Power6 fails
|
||||
# to process byte in 18 cycles, only in 23, because it fails to issue
|
||||
# 4 load instructions in two cycles, only in 3. As result non-compact
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
@@ -92,7 +99,7 @@ if ($flavour =~ /3[12]/) {
|
||||
$g="g";
|
||||
}
|
||||
|
||||
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
|
||||
while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
|
||||
$softonly=0; # allow hardware support
|
||||
@@ -779,10 +786,10 @@ ___
|
||||
$code.=<<___;
|
||||
# void AES_set_encrypt_key(const unsigned char *in, int bits,
|
||||
# AES_KEY *key) {
|
||||
.globl private_AES_set_encrypt_key
|
||||
.type private_AES_set_encrypt_key,\@function
|
||||
.globl AES_set_encrypt_key
|
||||
.type AES_set_encrypt_key,\@function
|
||||
.align 16
|
||||
private_AES_set_encrypt_key:
|
||||
AES_set_encrypt_key:
|
||||
_s390x_AES_set_encrypt_key:
|
||||
lghi $t0,0
|
||||
cl${g}r $inp,$t0
|
||||
@@ -1059,14 +1066,14 @@ $code.=<<___;
|
||||
.Lminus1:
|
||||
lghi %r2,-1
|
||||
br $ra
|
||||
.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
|
||||
.size AES_set_encrypt_key,.-AES_set_encrypt_key
|
||||
|
||||
# void AES_set_decrypt_key(const unsigned char *in, int bits,
|
||||
# AES_KEY *key) {
|
||||
.globl private_AES_set_decrypt_key
|
||||
.type private_AES_set_decrypt_key,\@function
|
||||
.globl AES_set_decrypt_key
|
||||
.type AES_set_decrypt_key,\@function
|
||||
.align 16
|
||||
private_AES_set_decrypt_key:
|
||||
AES_set_decrypt_key:
|
||||
#st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
|
||||
st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
|
||||
bras $ra,_s390x_AES_set_encrypt_key
|
||||
@@ -1166,7 +1173,7 @@ $code.=<<___;
|
||||
lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
|
||||
lghi %r2,0
|
||||
br $ra
|
||||
.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
|
||||
.size AES_set_decrypt_key,.-AES_set_decrypt_key
|
||||
___
|
||||
|
||||
########################################################################
|
||||
@@ -1568,8 +1575,8 @@ ___
|
||||
}
|
||||
|
||||
########################################################################
|
||||
# void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
|
||||
# size_t len, const AES_KEY *key1, const AES_KEY *key2,
|
||||
# void AES_xts_encrypt(const char *inp,char *out,size_t len,
|
||||
# const AES_KEY *key1, const AES_KEY *key2,
|
||||
# const unsigned char iv[16]);
|
||||
#
|
||||
{
|
||||
@@ -1937,8 +1944,8 @@ $code.=<<___;
|
||||
br $ra
|
||||
.size AES_xts_encrypt,.-AES_xts_encrypt
|
||||
___
|
||||
# void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
|
||||
# size_t len, const AES_KEY *key1, const AES_KEY *key2,
|
||||
# void AES_xts_decrypt(const char *inp,char *out,size_t len,
|
||||
# const AES_KEY *key1, const AES_KEY *key2,
|
||||
# const unsigned char iv[16]);
|
||||
#
|
||||
$code.=<<___;
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
@@ -30,10 +37,11 @@
|
||||
# optimal decrypt procedure]. Compared to GNU C generated code both
|
||||
# procedures are more than 60% faster:-)
|
||||
|
||||
$bits=32;
|
||||
for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
|
||||
if ($bits==64) { $bias=2047; $frame=192; }
|
||||
else { $bias=0; $frame=112; }
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
$frame="STACK_FRAME";
|
||||
$bias="STACK_BIAS";
|
||||
$locals=16;
|
||||
|
||||
$acc0="%l0";
|
||||
@@ -74,11 +82,13 @@ sub _data_word()
|
||||
while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
|
||||
}
|
||||
|
||||
$code.=<<___ if ($bits==64);
|
||||
$code.=<<___;
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
___
|
||||
$code.=<<___;
|
||||
#endif
|
||||
.section ".text",#alloc,#execinstr
|
||||
|
||||
.align 256
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
@@ -37,7 +44,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||
die "can't locate x86_64-xlate.pl";
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
$verticalspin=1; # unlike 32-bit version $verticalspin performs
|
||||
@@ -1282,13 +1289,13 @@ $code.=<<___;
|
||||
___
|
||||
}
|
||||
|
||||
# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits,
|
||||
# int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
|
||||
# AES_KEY *key)
|
||||
$code.=<<___;
|
||||
.globl private_AES_set_encrypt_key
|
||||
.type private_AES_set_encrypt_key,\@function,3
|
||||
.globl AES_set_encrypt_key
|
||||
.type AES_set_encrypt_key,\@function,3
|
||||
.align 16
|
||||
private_AES_set_encrypt_key:
|
||||
AES_set_encrypt_key:
|
||||
push %rbx
|
||||
push %rbp
|
||||
push %r12 # redundant, but allows to share
|
||||
@@ -1305,7 +1312,7 @@ private_AES_set_encrypt_key:
|
||||
add \$56,%rsp
|
||||
.Lenc_key_epilogue:
|
||||
ret
|
||||
.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key
|
||||
.size AES_set_encrypt_key,.-AES_set_encrypt_key
|
||||
|
||||
.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent
|
||||
.align 16
|
||||
@@ -1548,13 +1555,13 @@ $code.=<<___;
|
||||
___
|
||||
}
|
||||
|
||||
# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits,
|
||||
# int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
|
||||
# AES_KEY *key)
|
||||
$code.=<<___;
|
||||
.globl private_AES_set_decrypt_key
|
||||
.type private_AES_set_decrypt_key,\@function,3
|
||||
.globl AES_set_decrypt_key
|
||||
.type AES_set_decrypt_key,\@function,3
|
||||
.align 16
|
||||
private_AES_set_decrypt_key:
|
||||
AES_set_decrypt_key:
|
||||
push %rbx
|
||||
push %rbp
|
||||
push %r12
|
||||
@@ -1623,7 +1630,7 @@ $code.=<<___;
|
||||
add \$56,%rsp
|
||||
.Ldec_key_epilogue:
|
||||
ret
|
||||
.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key
|
||||
.size AES_set_decrypt_key,.-AES_set_decrypt_key
|
||||
___
|
||||
|
||||
# void AES_cbc_encrypt (const void char *inp, unsigned char *out,
|
||||
@@ -2770,13 +2777,13 @@ cbc_se_handler:
|
||||
.rva .LSEH_end_AES_decrypt
|
||||
.rva .LSEH_info_AES_decrypt
|
||||
|
||||
.rva .LSEH_begin_private_AES_set_encrypt_key
|
||||
.rva .LSEH_end_private_AES_set_encrypt_key
|
||||
.rva .LSEH_info_private_AES_set_encrypt_key
|
||||
.rva .LSEH_begin_AES_set_encrypt_key
|
||||
.rva .LSEH_end_AES_set_encrypt_key
|
||||
.rva .LSEH_info_AES_set_encrypt_key
|
||||
|
||||
.rva .LSEH_begin_private_AES_set_decrypt_key
|
||||
.rva .LSEH_end_private_AES_set_decrypt_key
|
||||
.rva .LSEH_info_private_AES_set_decrypt_key
|
||||
.rva .LSEH_begin_AES_set_decrypt_key
|
||||
.rva .LSEH_end_AES_set_decrypt_key
|
||||
.rva .LSEH_info_AES_set_decrypt_key
|
||||
|
||||
.rva .LSEH_begin_AES_cbc_encrypt
|
||||
.rva .LSEH_end_AES_cbc_encrypt
|
||||
@@ -2792,11 +2799,11 @@ cbc_se_handler:
|
||||
.byte 9,0,0,0
|
||||
.rva block_se_handler
|
||||
.rva .Ldec_prologue,.Ldec_epilogue # HandlerData[]
|
||||
.LSEH_info_private_AES_set_encrypt_key:
|
||||
.LSEH_info_AES_set_encrypt_key:
|
||||
.byte 9,0,0,0
|
||||
.rva key_se_handler
|
||||
.rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[]
|
||||
.LSEH_info_private_AES_set_decrypt_key:
|
||||
.LSEH_info_AES_set_decrypt_key:
|
||||
.byte 9,0,0,0
|
||||
.rva key_se_handler
|
||||
.rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[]
|
||||
|
||||
1270
crypto/aes/asm/aesfx-sparcv9.pl
Normal file
1270
crypto/aes/asm/aesfx-sparcv9.pl
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -67,7 +74,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
|
||||
$avx = ($2>=3.0) + ($2>3.0);
|
||||
}
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
# void aesni_multi_cbc_encrypt (
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -25,6 +32,7 @@
|
||||
# Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15) 5.98(7.05) +68%(+58%)
|
||||
# Ivy Bridge 5.05[+4.6] 9.65 5.54 +74%
|
||||
# Haswell 4.43[+3.6(4.2)] 8.00(8.58) 4.55(5.21) +75%(+65%)
|
||||
# Skylake 2.63[+3.5(4.1)] 6.17(6.69) 4.23(4.44) +46%(+51%)
|
||||
# Bulldozer 5.77[+6.0] 11.72 6.37 +84%
|
||||
#
|
||||
# AES-192-CBC
|
||||
@@ -39,6 +47,7 @@
|
||||
# Sandy Bridge 7.05 12.06(13.15) 7.12(7.72) +69%(+70%)
|
||||
# Ivy Bridge 7.05 11.65 7.12 +64%
|
||||
# Haswell 6.19 9.76(10.34) 6.21(6.25) +57%(+65%)
|
||||
# Skylake 3.62 7.16(7.68) 4.56(4.76) +57%(+61$)
|
||||
# Bulldozer 8.00 13.95 8.25 +69%
|
||||
#
|
||||
# (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for
|
||||
@@ -100,7 +109,7 @@ $shaext=1; ### set to zero if compiling for 1.0.1
|
||||
|
||||
$stitched_decrypt=0;
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
# void aesni_cbc_sha1_enc(const void *inp,
|
||||
@@ -298,7 +307,7 @@ ___
|
||||
$r++; unshift(@rndkey,pop(@rndkey));
|
||||
};
|
||||
|
||||
sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4
|
||||
sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4
|
||||
{ use integer;
|
||||
my $body = shift;
|
||||
my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
|
||||
@@ -1137,7 +1146,7 @@ ___
|
||||
$r++; unshift(@rndkey,pop(@rndkey));
|
||||
};
|
||||
|
||||
sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4
|
||||
sub Xupdate_avx_16_31() # recall that $Xi starts with 4
|
||||
{ use integer;
|
||||
my $body = shift;
|
||||
my @insns = (&$body,&$body,&$body,&$body); # 40 instructions
|
||||
@@ -1702,6 +1711,7 @@ $code.=<<___;
|
||||
mov 240($key),$rounds
|
||||
sub $in0,$out
|
||||
movups ($key),$rndkey0 # $key[0]
|
||||
movups ($ivp),$iv # load IV
|
||||
movups 16($key),$rndkey[0] # forward reference
|
||||
lea 112($key),$key # size optimization
|
||||
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -25,9 +32,10 @@
|
||||
# Sandy Bridge 5.05/6.05/7.05+11.6 13.0 +28%/36%/43%
|
||||
# Ivy Bridge 5.05/6.05/7.05+10.3 11.6 +32%/41%/50%
|
||||
# Haswell 4.43/5.29/6.19+7.80 8.79 +39%/49%/59%
|
||||
# Skylake 2.62/3.14/3.62+7.70 8.10 +27%/34%/40%
|
||||
# Bulldozer 5.77/6.89/8.00+13.7 13.7 +42%/50%/58%
|
||||
#
|
||||
# (*) there are XOP, AVX1 and AVX2 code pathes, meaning that
|
||||
# (*) there are XOP, AVX1 and AVX2 code paths, meaning that
|
||||
# Westmere is omitted from loop, this is because gain was not
|
||||
# estimated high enough to justify the effort;
|
||||
# (**) these are EVP-free results, results obtained with 'speed
|
||||
@@ -66,7 +74,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([
|
||||
$shaext=$avx; ### set to zero if compiling for 1.0.1
|
||||
$avx=1 if (!$shaext && $avx);
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
$func="aesni_cbc_sha256_enc";
|
||||
@@ -1299,6 +1307,7 @@ $code.=<<___;
|
||||
mov 240($key),$rounds
|
||||
sub $in0,$out
|
||||
movups ($key),$rndkey0 # $key[0]
|
||||
movups ($ivp),$iv # load IV
|
||||
movups 16($key),$rndkey[0] # forward reference
|
||||
lea 112($key),$key # size optimization
|
||||
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -43,16 +50,20 @@
|
||||
# Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing
|
||||
# one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09.
|
||||
|
||||
# November 2015
|
||||
#
|
||||
# Add aesni_ocb_[en|de]crypt.
|
||||
|
||||
######################################################################
|
||||
# Current large-block performance in cycles per byte processed with
|
||||
# 128-bit key (less is better).
|
||||
#
|
||||
# CBC en-/decrypt CTR XTS ECB
|
||||
# CBC en-/decrypt CTR XTS ECB OCB
|
||||
# Westmere 3.77/1.37 1.37 1.52 1.27
|
||||
# * Bridge 5.07/0.98 0.99 1.09 0.91
|
||||
# Haswell 4.44/0.80 0.97 1.03 0.72
|
||||
# Silvermont 5.77/3.56 3.67 4.03 3.46
|
||||
# Bulldozer 5.80/0.98 1.05 1.24 0.93
|
||||
# * Bridge 5.07/0.98 0.99 1.09 0.91 1.10
|
||||
# Haswell 4.44/0.80 0.97 1.03 0.72 0.76
|
||||
# Silvermont 5.77/3.56 3.67 4.03 3.46 4.03
|
||||
# Bulldozer 5.80/0.98 1.05 1.24 0.93 1.23
|
||||
|
||||
$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
|
||||
# generates drop-in replacement for
|
||||
@@ -63,6 +74,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "x86asm.pl";
|
||||
|
||||
$output = pop;
|
||||
open OUT,">$output";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
&asm_init($ARGV[0],$0);
|
||||
|
||||
&external_label("OPENSSL_ia32cap_P");
|
||||
@@ -1831,6 +1846,877 @@ if ($PREFIX eq "aesni") {
|
||||
&mov ("esp",&DWP(16*7+4,"esp")); # restore %esp
|
||||
&function_end("aesni_xts_decrypt");
|
||||
}
|
||||
|
||||
######################################################################
|
||||
# void aesni_ocb_[en|de]crypt(const char *inp, char *out, size_t blocks,
|
||||
# const AES_KEY *key, unsigned int start_block_num,
|
||||
# unsigned char offset_i[16], const unsigned char L_[][16],
|
||||
# unsigned char checksum[16]);
|
||||
#
|
||||
{
|
||||
# offsets within stack frame
|
||||
my $checksum = 16*6;
|
||||
my ($key_off,$rounds_off,$out_off,$end_off,$esp_off)=map(16*7+4*$_,(0..4));
|
||||
|
||||
# reassigned registers
|
||||
my ($l_,$block,$i1,$i3,$i5) = ($rounds_,$key_,$rounds,$len,$out);
|
||||
# $l_, $blocks, $inp, $key are permanently allocated in registers;
|
||||
# remaining non-volatile ones are offloaded to stack, which even
|
||||
# stay invariant after written to stack.
|
||||
|
||||
&function_begin("aesni_ocb_encrypt");
|
||||
&mov ($rounds,&wparam(5)); # &offset_i
|
||||
&mov ($rounds_,&wparam(7)); # &checksum
|
||||
|
||||
&mov ($inp,&wparam(0));
|
||||
&mov ($out,&wparam(1));
|
||||
&mov ($len,&wparam(2));
|
||||
&mov ($key,&wparam(3));
|
||||
&movdqu ($rndkey0,&QWP(0,$rounds)); # load offset_i
|
||||
&mov ($block,&wparam(4)); # start_block_num
|
||||
&movdqu ($rndkey1,&QWP(0,$rounds_)); # load checksum
|
||||
&mov ($l_,&wparam(6)); # L_
|
||||
|
||||
&mov ($rounds,"esp");
|
||||
&sub ("esp",$esp_off+4); # alloca
|
||||
&and ("esp",-16); # align stack
|
||||
|
||||
&sub ($out,$inp);
|
||||
&shl ($len,4);
|
||||
&lea ($len,&DWP(-16*6,$inp,$len)); # end of input - 16*6
|
||||
&mov (&DWP($out_off,"esp"),$out);
|
||||
&mov (&DWP($end_off,"esp"),$len);
|
||||
&mov (&DWP($esp_off,"esp"),$rounds);
|
||||
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&test ($block,1);
|
||||
&jnz (&label("odd"));
|
||||
|
||||
&bsf ($i3,$block);
|
||||
&add ($block,1);
|
||||
&shl ($i3,4);
|
||||
&movdqu ($inout5,&QWP(0,$l_,$i3));
|
||||
&mov ($i3,$key); # put aside key
|
||||
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&lea ($inp,&DWP(16,$inp));
|
||||
|
||||
&pxor ($inout5,$rndkey0); # ^ last offset_i
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&pxor ($inout0,$inout5); # ^ offset_i
|
||||
|
||||
&movdqa ($inout4,$rndkey1);
|
||||
if ($inline)
|
||||
{ &aesni_inline_generate1("enc"); }
|
||||
else
|
||||
{ &call ("_aesni_encrypt1"); }
|
||||
|
||||
&xorps ($inout0,$inout5); # ^ offset_i
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&movdqa ($rndkey1,$inout4); # pass the checksum
|
||||
|
||||
&movups (&QWP(-16,$out,$inp),$inout0); # store output
|
||||
|
||||
&mov ($rounds,&DWP(240,$i3));
|
||||
&mov ($key,$i3); # restore key
|
||||
&mov ($len,&DWP($end_off,"esp"));
|
||||
|
||||
&set_label("odd");
|
||||
&shl ($rounds,4);
|
||||
&mov ($out,16);
|
||||
&sub ($out,$rounds); # twisted rounds
|
||||
&mov (&DWP($key_off,"esp"),$key);
|
||||
&lea ($key,&DWP(32,$key,$rounds)); # end of key schedule
|
||||
&mov (&DWP($rounds_off,"esp"),$out);
|
||||
|
||||
&cmp ($inp,$len);
|
||||
&ja (&label("short"));
|
||||
&jmp (&label("grandloop"));
|
||||
|
||||
&set_label("grandloop",32);
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&lea ($i3,&DWP(3,$block));
|
||||
&lea ($i5,&DWP(5,$block));
|
||||
&add ($block,6);
|
||||
&bsf ($i1,$i1);
|
||||
&bsf ($i3,$i3);
|
||||
&bsf ($i5,$i5);
|
||||
&shl ($i1,4);
|
||||
&shl ($i3,4);
|
||||
&shl ($i5,4);
|
||||
&movdqu ($inout0,&QWP(0,$l_));
|
||||
&movdqu ($inout1,&QWP(0,$l_,$i1));
|
||||
&mov ($rounds,&DWP($rounds_off,"esp"));
|
||||
&movdqa ($inout2,$inout0);
|
||||
&movdqu ($inout3,&QWP(0,$l_,$i3));
|
||||
&movdqa ($inout4,$inout0);
|
||||
&movdqu ($inout5,&QWP(0,$l_,$i5));
|
||||
|
||||
&pxor ($inout0,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout1,$inout0);
|
||||
&movdqa (&QWP(16*0,"esp"),$inout0);
|
||||
&pxor ($inout2,$inout1);
|
||||
&movdqa (&QWP(16*1,"esp"),$inout1);
|
||||
&pxor ($inout3,$inout2);
|
||||
&movdqa (&QWP(16*2,"esp"),$inout2);
|
||||
&pxor ($inout4,$inout3);
|
||||
&movdqa (&QWP(16*3,"esp"),$inout3);
|
||||
&pxor ($inout5,$inout4);
|
||||
&movdqa (&QWP(16*4,"esp"),$inout4);
|
||||
&movdqa (&QWP(16*5,"esp"),$inout5);
|
||||
|
||||
&$movekey ($rndkey0,&QWP(-48,$key,$rounds));
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&movdqu ($inout2,&QWP(16*2,$inp));
|
||||
&movdqu ($inout3,&QWP(16*3,$inp));
|
||||
&movdqu ($inout4,&QWP(16*4,$inp));
|
||||
&movdqu ($inout5,&QWP(16*5,$inp));
|
||||
&lea ($inp,&DWP(16*6,$inp));
|
||||
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&pxor ($inout0,$rndkey0); # ^ roundkey[0]
|
||||
&pxor ($rndkey1,$inout1);
|
||||
&pxor ($inout1,$rndkey0);
|
||||
&pxor ($rndkey1,$inout2);
|
||||
&pxor ($inout2,$rndkey0);
|
||||
&pxor ($rndkey1,$inout3);
|
||||
&pxor ($inout3,$rndkey0);
|
||||
&pxor ($rndkey1,$inout4);
|
||||
&pxor ($inout4,$rndkey0);
|
||||
&pxor ($rndkey1,$inout5);
|
||||
&pxor ($inout5,$rndkey0);
|
||||
&movdqa (&QWP($checksum,"esp"),$rndkey1);
|
||||
|
||||
&$movekey ($rndkey1,&QWP(-32,$key,$rounds));
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($inout2,&QWP(16*2,"esp"));
|
||||
&pxor ($inout3,&QWP(16*3,"esp"));
|
||||
&pxor ($inout4,&QWP(16*4,"esp"));
|
||||
&pxor ($inout5,&QWP(16*5,"esp"));
|
||||
|
||||
&$movekey ($rndkey0,&QWP(-16,$key,$rounds));
|
||||
&aesenc ($inout0,$rndkey1);
|
||||
&aesenc ($inout1,$rndkey1);
|
||||
&aesenc ($inout2,$rndkey1);
|
||||
&aesenc ($inout3,$rndkey1);
|
||||
&aesenc ($inout4,$rndkey1);
|
||||
&aesenc ($inout5,$rndkey1);
|
||||
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&mov ($len,&DWP($end_off,"esp"));
|
||||
&call ("_aesni_encrypt6_enter");
|
||||
|
||||
&movdqa ($rndkey0,&QWP(16*5,"esp")); # pass last offset_i
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($inout2,&QWP(16*2,"esp"));
|
||||
&pxor ($inout3,&QWP(16*3,"esp"));
|
||||
&pxor ($inout4,&QWP(16*4,"esp"));
|
||||
&pxor ($inout5,$rndkey0);
|
||||
&movdqa ($rndkey1,&QWP($checksum,"esp"));# pass the checksum
|
||||
|
||||
&movdqu (&QWP(-16*6,$out,$inp),$inout0);# store output
|
||||
&movdqu (&QWP(-16*5,$out,$inp),$inout1);
|
||||
&movdqu (&QWP(-16*4,$out,$inp),$inout2);
|
||||
&movdqu (&QWP(-16*3,$out,$inp),$inout3);
|
||||
&movdqu (&QWP(-16*2,$out,$inp),$inout4);
|
||||
&movdqu (&QWP(-16*1,$out,$inp),$inout5);
|
||||
&cmp ($inp,$len); # done yet?
|
||||
&jb (&label("grandloop"));
|
||||
|
||||
&set_label("short");
|
||||
&add ($len,16*6);
|
||||
&sub ($len,$inp);
|
||||
&jz (&label("done"));
|
||||
|
||||
&cmp ($len,16*2);
|
||||
&jb (&label("one"));
|
||||
&je (&label("two"));
|
||||
|
||||
&cmp ($len,16*4);
|
||||
&jb (&label("three"));
|
||||
&je (&label("four"));
|
||||
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&lea ($i3,&DWP(3,$block));
|
||||
&bsf ($i1,$i1);
|
||||
&bsf ($i3,$i3);
|
||||
&shl ($i1,4);
|
||||
&shl ($i3,4);
|
||||
&movdqu ($inout0,&QWP(0,$l_));
|
||||
&movdqu ($inout1,&QWP(0,$l_,$i1));
|
||||
&mov ($rounds,&DWP($rounds_off,"esp"));
|
||||
&movdqa ($inout2,$inout0);
|
||||
&movdqu ($inout3,&QWP(0,$l_,$i3));
|
||||
&movdqa ($inout4,$inout0);
|
||||
|
||||
&pxor ($inout0,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout1,$inout0);
|
||||
&movdqa (&QWP(16*0,"esp"),$inout0);
|
||||
&pxor ($inout2,$inout1);
|
||||
&movdqa (&QWP(16*1,"esp"),$inout1);
|
||||
&pxor ($inout3,$inout2);
|
||||
&movdqa (&QWP(16*2,"esp"),$inout2);
|
||||
&pxor ($inout4,$inout3);
|
||||
&movdqa (&QWP(16*3,"esp"),$inout3);
|
||||
&pxor ($inout5,$inout4);
|
||||
&movdqa (&QWP(16*4,"esp"),$inout4);
|
||||
|
||||
&$movekey ($rndkey0,&QWP(-48,$key,$rounds));
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&movdqu ($inout2,&QWP(16*2,$inp));
|
||||
&movdqu ($inout3,&QWP(16*3,$inp));
|
||||
&movdqu ($inout4,&QWP(16*4,$inp));
|
||||
&pxor ($inout5,$inout5);
|
||||
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&pxor ($inout0,$rndkey0); # ^ roundkey[0]
|
||||
&pxor ($rndkey1,$inout1);
|
||||
&pxor ($inout1,$rndkey0);
|
||||
&pxor ($rndkey1,$inout2);
|
||||
&pxor ($inout2,$rndkey0);
|
||||
&pxor ($rndkey1,$inout3);
|
||||
&pxor ($inout3,$rndkey0);
|
||||
&pxor ($rndkey1,$inout4);
|
||||
&pxor ($inout4,$rndkey0);
|
||||
&movdqa (&QWP($checksum,"esp"),$rndkey1);
|
||||
|
||||
&$movekey ($rndkey1,&QWP(-32,$key,$rounds));
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($inout2,&QWP(16*2,"esp"));
|
||||
&pxor ($inout3,&QWP(16*3,"esp"));
|
||||
&pxor ($inout4,&QWP(16*4,"esp"));
|
||||
|
||||
&$movekey ($rndkey0,&QWP(-16,$key,$rounds));
|
||||
&aesenc ($inout0,$rndkey1);
|
||||
&aesenc ($inout1,$rndkey1);
|
||||
&aesenc ($inout2,$rndkey1);
|
||||
&aesenc ($inout3,$rndkey1);
|
||||
&aesenc ($inout4,$rndkey1);
|
||||
&aesenc ($inout5,$rndkey1);
|
||||
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&call ("_aesni_encrypt6_enter");
|
||||
|
||||
&movdqa ($rndkey0,&QWP(16*4,"esp")); # pass last offset_i
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($inout2,&QWP(16*2,"esp"));
|
||||
&pxor ($inout3,&QWP(16*3,"esp"));
|
||||
&pxor ($inout4,$rndkey0);
|
||||
&movdqa ($rndkey1,&QWP($checksum,"esp"));# pass the checksum
|
||||
|
||||
&movdqu (&QWP(16*0,$out,$inp),$inout0); # store output
|
||||
&movdqu (&QWP(16*1,$out,$inp),$inout1);
|
||||
&movdqu (&QWP(16*2,$out,$inp),$inout2);
|
||||
&movdqu (&QWP(16*3,$out,$inp),$inout3);
|
||||
&movdqu (&QWP(16*4,$out,$inp),$inout4);
|
||||
|
||||
&jmp (&label("done"));
|
||||
|
||||
&set_label("one",16);
|
||||
&movdqu ($inout5,&QWP(0,$l_));
|
||||
&mov ($key,&DWP($key_off,"esp")); # restore key
|
||||
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&pxor ($inout5,$rndkey0); # ^ last offset_i
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&pxor ($inout0,$inout5); # ^ offset_i
|
||||
|
||||
&movdqa ($inout4,$rndkey1);
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
if ($inline)
|
||||
{ &aesni_inline_generate1("enc"); }
|
||||
else
|
||||
{ &call ("_aesni_encrypt1"); }
|
||||
|
||||
&xorps ($inout0,$inout5); # ^ offset_i
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&movdqa ($rndkey1,$inout4); # pass the checksum
|
||||
&movups (&QWP(0,$out,$inp),$inout0);
|
||||
|
||||
&jmp (&label("done"));
|
||||
|
||||
&set_label("two",16);
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&mov ($key,&DWP($key_off,"esp")); # restore key
|
||||
&bsf ($i1,$i1);
|
||||
&shl ($i1,4);
|
||||
&movdqu ($inout4,&QWP(0,$l_));
|
||||
&movdqu ($inout5,&QWP(0,$l_,$i1));
|
||||
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&pxor ($inout4,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout5,$inout4);
|
||||
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&pxor ($inout0,$inout4); # ^ offset_i
|
||||
&pxor ($rndkey1,$inout1);
|
||||
&pxor ($inout1,$inout5);
|
||||
|
||||
&movdqa ($inout3,$rndkey1)
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&call ("_aesni_encrypt2");
|
||||
|
||||
&xorps ($inout0,$inout4); # ^ offset_i
|
||||
&xorps ($inout1,$inout5);
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&movdqa ($rndkey1,$inout3); # pass the checksum
|
||||
&movups (&QWP(16*0,$out,$inp),$inout0); # store output
|
||||
&movups (&QWP(16*1,$out,$inp),$inout1);
|
||||
|
||||
&jmp (&label("done"));
|
||||
|
||||
&set_label("three",16);
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&mov ($key,&DWP($key_off,"esp")); # restore key
|
||||
&bsf ($i1,$i1);
|
||||
&shl ($i1,4);
|
||||
&movdqu ($inout3,&QWP(0,$l_));
|
||||
&movdqu ($inout4,&QWP(0,$l_,$i1));
|
||||
&movdqa ($inout5,$inout3);
|
||||
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&movdqu ($inout2,&QWP(16*2,$inp));
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&pxor ($inout3,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout4,$inout3);
|
||||
&pxor ($inout5,$inout4);
|
||||
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&pxor ($inout0,$inout3); # ^ offset_i
|
||||
&pxor ($rndkey1,$inout1);
|
||||
&pxor ($inout1,$inout4);
|
||||
&pxor ($rndkey1,$inout2);
|
||||
&pxor ($inout2,$inout5);
|
||||
|
||||
&movdqa (&QWP($checksum,"esp"),$rndkey1);
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&call ("_aesni_encrypt3");
|
||||
|
||||
&xorps ($inout0,$inout3); # ^ offset_i
|
||||
&xorps ($inout1,$inout4);
|
||||
&xorps ($inout2,$inout5);
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&movdqa ($rndkey1,&QWP($checksum,"esp"));# pass the checksum
|
||||
&movups (&QWP(16*0,$out,$inp),$inout0); # store output
|
||||
&movups (&QWP(16*1,$out,$inp),$inout1);
|
||||
&movups (&QWP(16*2,$out,$inp),$inout2);
|
||||
|
||||
&jmp (&label("done"));
|
||||
|
||||
&set_label("four",16);
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&lea ($i3,&DWP(3,$block));
|
||||
&bsf ($i1,$i1);
|
||||
&bsf ($i3,$i3);
|
||||
&mov ($key,&DWP($key_off,"esp")); # restore key
|
||||
&shl ($i1,4);
|
||||
&shl ($i3,4);
|
||||
&movdqu ($inout2,&QWP(0,$l_));
|
||||
&movdqu ($inout3,&QWP(0,$l_,$i1));
|
||||
&movdqa ($inout4,$inout2);
|
||||
&movdqu ($inout5,&QWP(0,$l_,$i3));
|
||||
|
||||
&pxor ($inout2,$rndkey0); # ^ last offset_i
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&pxor ($inout3,$inout2);
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&pxor ($inout4,$inout3);
|
||||
&movdqa (&QWP(16*0,"esp"),$inout2);
|
||||
&pxor ($inout5,$inout4);
|
||||
&movdqa (&QWP(16*1,"esp"),$inout3);
|
||||
&movdqu ($inout2,&QWP(16*2,$inp));
|
||||
&movdqu ($inout3,&QWP(16*3,$inp));
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&pxor ($rndkey1,$inout1);
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($rndkey1,$inout2);
|
||||
&pxor ($inout2,$inout4);
|
||||
&pxor ($rndkey1,$inout3);
|
||||
&pxor ($inout3,$inout5);
|
||||
|
||||
&movdqa (&QWP($checksum,"esp"),$rndkey1)
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&call ("_aesni_encrypt4");
|
||||
|
||||
&xorps ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&xorps ($inout1,&QWP(16*1,"esp"));
|
||||
&xorps ($inout2,$inout4);
|
||||
&movups (&QWP(16*0,$out,$inp),$inout0); # store output
|
||||
&xorps ($inout3,$inout5);
|
||||
&movups (&QWP(16*1,$out,$inp),$inout1);
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&movups (&QWP(16*2,$out,$inp),$inout2);
|
||||
&movdqa ($rndkey1,&QWP($checksum,"esp"));# pass the checksum
|
||||
&movups (&QWP(16*3,$out,$inp),$inout3);
|
||||
|
||||
&set_label("done");
|
||||
&mov ($key,&DWP($esp_off,"esp"));
|
||||
&pxor ($inout0,$inout0); # clear register bank
|
||||
&pxor ($inout1,$inout1);
|
||||
&movdqa (&QWP(16*0,"esp"),$inout0); # clear stack
|
||||
&pxor ($inout2,$inout2);
|
||||
&movdqa (&QWP(16*1,"esp"),$inout0);
|
||||
&pxor ($inout3,$inout3);
|
||||
&movdqa (&QWP(16*2,"esp"),$inout0);
|
||||
&pxor ($inout4,$inout4);
|
||||
&movdqa (&QWP(16*3,"esp"),$inout0);
|
||||
&pxor ($inout5,$inout5);
|
||||
&movdqa (&QWP(16*4,"esp"),$inout0);
|
||||
&movdqa (&QWP(16*5,"esp"),$inout0);
|
||||
&movdqa (&QWP(16*6,"esp"),$inout0);
|
||||
|
||||
&lea ("esp",&DWP(0,$key));
|
||||
&mov ($rounds,&wparam(5)); # &offset_i
|
||||
&mov ($rounds_,&wparam(7)); # &checksum
|
||||
&movdqu (&QWP(0,$rounds),$rndkey0);
|
||||
&pxor ($rndkey0,$rndkey0);
|
||||
&movdqu (&QWP(0,$rounds_),$rndkey1);
|
||||
&pxor ($rndkey1,$rndkey1);
|
||||
&function_end("aesni_ocb_encrypt");
|
||||
|
||||
&function_begin("aesni_ocb_decrypt");
|
||||
&mov ($rounds,&wparam(5)); # &offset_i
|
||||
&mov ($rounds_,&wparam(7)); # &checksum
|
||||
|
||||
&mov ($inp,&wparam(0));
|
||||
&mov ($out,&wparam(1));
|
||||
&mov ($len,&wparam(2));
|
||||
&mov ($key,&wparam(3));
|
||||
&movdqu ($rndkey0,&QWP(0,$rounds)); # load offset_i
|
||||
&mov ($block,&wparam(4)); # start_block_num
|
||||
&movdqu ($rndkey1,&QWP(0,$rounds_)); # load checksum
|
||||
&mov ($l_,&wparam(6)); # L_
|
||||
|
||||
&mov ($rounds,"esp");
|
||||
&sub ("esp",$esp_off+4); # alloca
|
||||
&and ("esp",-16); # align stack
|
||||
|
||||
&sub ($out,$inp);
|
||||
&shl ($len,4);
|
||||
&lea ($len,&DWP(-16*6,$inp,$len)); # end of input - 16*6
|
||||
&mov (&DWP($out_off,"esp"),$out);
|
||||
&mov (&DWP($end_off,"esp"),$len);
|
||||
&mov (&DWP($esp_off,"esp"),$rounds);
|
||||
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&test ($block,1);
|
||||
&jnz (&label("odd"));
|
||||
|
||||
&bsf ($i3,$block);
|
||||
&add ($block,1);
|
||||
&shl ($i3,4);
|
||||
&movdqu ($inout5,&QWP(0,$l_,$i3));
|
||||
&mov ($i3,$key); # put aside key
|
||||
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&lea ($inp,&DWP(16,$inp));
|
||||
|
||||
&pxor ($inout5,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout0,$inout5); # ^ offset_i
|
||||
|
||||
&movdqa ($inout4,$rndkey1);
|
||||
if ($inline)
|
||||
{ &aesni_inline_generate1("dec"); }
|
||||
else
|
||||
{ &call ("_aesni_decrypt1"); }
|
||||
|
||||
&xorps ($inout0,$inout5); # ^ offset_i
|
||||
&movaps ($rndkey1,$inout4); # pass the checksum
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&xorps ($rndkey1,$inout0); # checksum
|
||||
&movups (&QWP(-16,$out,$inp),$inout0); # store output
|
||||
|
||||
&mov ($rounds,&DWP(240,$i3));
|
||||
&mov ($key,$i3); # restore key
|
||||
&mov ($len,&DWP($end_off,"esp"));
|
||||
|
||||
&set_label("odd");
|
||||
&shl ($rounds,4);
|
||||
&mov ($out,16);
|
||||
&sub ($out,$rounds); # twisted rounds
|
||||
&mov (&DWP($key_off,"esp"),$key);
|
||||
&lea ($key,&DWP(32,$key,$rounds)); # end of key schedule
|
||||
&mov (&DWP($rounds_off,"esp"),$out);
|
||||
|
||||
&cmp ($inp,$len);
|
||||
&ja (&label("short"));
|
||||
&jmp (&label("grandloop"));
|
||||
|
||||
&set_label("grandloop",32);
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&lea ($i3,&DWP(3,$block));
|
||||
&lea ($i5,&DWP(5,$block));
|
||||
&add ($block,6);
|
||||
&bsf ($i1,$i1);
|
||||
&bsf ($i3,$i3);
|
||||
&bsf ($i5,$i5);
|
||||
&shl ($i1,4);
|
||||
&shl ($i3,4);
|
||||
&shl ($i5,4);
|
||||
&movdqu ($inout0,&QWP(0,$l_));
|
||||
&movdqu ($inout1,&QWP(0,$l_,$i1));
|
||||
&mov ($rounds,&DWP($rounds_off,"esp"));
|
||||
&movdqa ($inout2,$inout0);
|
||||
&movdqu ($inout3,&QWP(0,$l_,$i3));
|
||||
&movdqa ($inout4,$inout0);
|
||||
&movdqu ($inout5,&QWP(0,$l_,$i5));
|
||||
|
||||
&pxor ($inout0,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout1,$inout0);
|
||||
&movdqa (&QWP(16*0,"esp"),$inout0);
|
||||
&pxor ($inout2,$inout1);
|
||||
&movdqa (&QWP(16*1,"esp"),$inout1);
|
||||
&pxor ($inout3,$inout2);
|
||||
&movdqa (&QWP(16*2,"esp"),$inout2);
|
||||
&pxor ($inout4,$inout3);
|
||||
&movdqa (&QWP(16*3,"esp"),$inout3);
|
||||
&pxor ($inout5,$inout4);
|
||||
&movdqa (&QWP(16*4,"esp"),$inout4);
|
||||
&movdqa (&QWP(16*5,"esp"),$inout5);
|
||||
|
||||
&$movekey ($rndkey0,&QWP(-48,$key,$rounds));
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&movdqu ($inout2,&QWP(16*2,$inp));
|
||||
&movdqu ($inout3,&QWP(16*3,$inp));
|
||||
&movdqu ($inout4,&QWP(16*4,$inp));
|
||||
&movdqu ($inout5,&QWP(16*5,$inp));
|
||||
&lea ($inp,&DWP(16*6,$inp));
|
||||
|
||||
&movdqa (&QWP($checksum,"esp"),$rndkey1);
|
||||
&pxor ($inout0,$rndkey0); # ^ roundkey[0]
|
||||
&pxor ($inout1,$rndkey0);
|
||||
&pxor ($inout2,$rndkey0);
|
||||
&pxor ($inout3,$rndkey0);
|
||||
&pxor ($inout4,$rndkey0);
|
||||
&pxor ($inout5,$rndkey0);
|
||||
|
||||
&$movekey ($rndkey1,&QWP(-32,$key,$rounds));
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($inout2,&QWP(16*2,"esp"));
|
||||
&pxor ($inout3,&QWP(16*3,"esp"));
|
||||
&pxor ($inout4,&QWP(16*4,"esp"));
|
||||
&pxor ($inout5,&QWP(16*5,"esp"));
|
||||
|
||||
&$movekey ($rndkey0,&QWP(-16,$key,$rounds));
|
||||
&aesdec ($inout0,$rndkey1);
|
||||
&aesdec ($inout1,$rndkey1);
|
||||
&aesdec ($inout2,$rndkey1);
|
||||
&aesdec ($inout3,$rndkey1);
|
||||
&aesdec ($inout4,$rndkey1);
|
||||
&aesdec ($inout5,$rndkey1);
|
||||
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&mov ($len,&DWP($end_off,"esp"));
|
||||
&call ("_aesni_decrypt6_enter");
|
||||
|
||||
&movdqa ($rndkey0,&QWP(16*5,"esp")); # pass last offset_i
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&movdqa ($rndkey1,&QWP($checksum,"esp"));
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($inout2,&QWP(16*2,"esp"));
|
||||
&pxor ($inout3,&QWP(16*3,"esp"));
|
||||
&pxor ($inout4,&QWP(16*4,"esp"));
|
||||
&pxor ($inout5,$rndkey0);
|
||||
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&movdqu (&QWP(-16*6,$out,$inp),$inout0);# store output
|
||||
&pxor ($rndkey1,$inout1);
|
||||
&movdqu (&QWP(-16*5,$out,$inp),$inout1);
|
||||
&pxor ($rndkey1,$inout2);
|
||||
&movdqu (&QWP(-16*4,$out,$inp),$inout2);
|
||||
&pxor ($rndkey1,$inout3);
|
||||
&movdqu (&QWP(-16*3,$out,$inp),$inout3);
|
||||
&pxor ($rndkey1,$inout4);
|
||||
&movdqu (&QWP(-16*2,$out,$inp),$inout4);
|
||||
&pxor ($rndkey1,$inout5);
|
||||
&movdqu (&QWP(-16*1,$out,$inp),$inout5);
|
||||
&cmp ($inp,$len); # done yet?
|
||||
&jb (&label("grandloop"));
|
||||
|
||||
&set_label("short");
|
||||
&add ($len,16*6);
|
||||
&sub ($len,$inp);
|
||||
&jz (&label("done"));
|
||||
|
||||
&cmp ($len,16*2);
|
||||
&jb (&label("one"));
|
||||
&je (&label("two"));
|
||||
|
||||
&cmp ($len,16*4);
|
||||
&jb (&label("three"));
|
||||
&je (&label("four"));
|
||||
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&lea ($i3,&DWP(3,$block));
|
||||
&bsf ($i1,$i1);
|
||||
&bsf ($i3,$i3);
|
||||
&shl ($i1,4);
|
||||
&shl ($i3,4);
|
||||
&movdqu ($inout0,&QWP(0,$l_));
|
||||
&movdqu ($inout1,&QWP(0,$l_,$i1));
|
||||
&mov ($rounds,&DWP($rounds_off,"esp"));
|
||||
&movdqa ($inout2,$inout0);
|
||||
&movdqu ($inout3,&QWP(0,$l_,$i3));
|
||||
&movdqa ($inout4,$inout0);
|
||||
|
||||
&pxor ($inout0,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout1,$inout0);
|
||||
&movdqa (&QWP(16*0,"esp"),$inout0);
|
||||
&pxor ($inout2,$inout1);
|
||||
&movdqa (&QWP(16*1,"esp"),$inout1);
|
||||
&pxor ($inout3,$inout2);
|
||||
&movdqa (&QWP(16*2,"esp"),$inout2);
|
||||
&pxor ($inout4,$inout3);
|
||||
&movdqa (&QWP(16*3,"esp"),$inout3);
|
||||
&pxor ($inout5,$inout4);
|
||||
&movdqa (&QWP(16*4,"esp"),$inout4);
|
||||
|
||||
&$movekey ($rndkey0,&QWP(-48,$key,$rounds));
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&movdqu ($inout2,&QWP(16*2,$inp));
|
||||
&movdqu ($inout3,&QWP(16*3,$inp));
|
||||
&movdqu ($inout4,&QWP(16*4,$inp));
|
||||
&pxor ($inout5,$inout5);
|
||||
|
||||
&movdqa (&QWP($checksum,"esp"),$rndkey1);
|
||||
&pxor ($inout0,$rndkey0); # ^ roundkey[0]
|
||||
&pxor ($inout1,$rndkey0);
|
||||
&pxor ($inout2,$rndkey0);
|
||||
&pxor ($inout3,$rndkey0);
|
||||
&pxor ($inout4,$rndkey0);
|
||||
|
||||
&$movekey ($rndkey1,&QWP(-32,$key,$rounds));
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($inout2,&QWP(16*2,"esp"));
|
||||
&pxor ($inout3,&QWP(16*3,"esp"));
|
||||
&pxor ($inout4,&QWP(16*4,"esp"));
|
||||
|
||||
&$movekey ($rndkey0,&QWP(-16,$key,$rounds));
|
||||
&aesdec ($inout0,$rndkey1);
|
||||
&aesdec ($inout1,$rndkey1);
|
||||
&aesdec ($inout2,$rndkey1);
|
||||
&aesdec ($inout3,$rndkey1);
|
||||
&aesdec ($inout4,$rndkey1);
|
||||
&aesdec ($inout5,$rndkey1);
|
||||
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&call ("_aesni_decrypt6_enter");
|
||||
|
||||
&movdqa ($rndkey0,&QWP(16*4,"esp")); # pass last offset_i
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&movdqa ($rndkey1,&QWP($checksum,"esp"));
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($inout2,&QWP(16*2,"esp"));
|
||||
&pxor ($inout3,&QWP(16*3,"esp"));
|
||||
&pxor ($inout4,$rndkey0);
|
||||
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&movdqu (&QWP(16*0,$out,$inp),$inout0); # store output
|
||||
&pxor ($rndkey1,$inout1);
|
||||
&movdqu (&QWP(16*1,$out,$inp),$inout1);
|
||||
&pxor ($rndkey1,$inout2);
|
||||
&movdqu (&QWP(16*2,$out,$inp),$inout2);
|
||||
&pxor ($rndkey1,$inout3);
|
||||
&movdqu (&QWP(16*3,$out,$inp),$inout3);
|
||||
&pxor ($rndkey1,$inout4);
|
||||
&movdqu (&QWP(16*4,$out,$inp),$inout4);
|
||||
|
||||
&jmp (&label("done"));
|
||||
|
||||
&set_label("one",16);
|
||||
&movdqu ($inout5,&QWP(0,$l_));
|
||||
&mov ($key,&DWP($key_off,"esp")); # restore key
|
||||
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&pxor ($inout5,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout0,$inout5); # ^ offset_i
|
||||
|
||||
&movdqa ($inout4,$rndkey1);
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
if ($inline)
|
||||
{ &aesni_inline_generate1("dec"); }
|
||||
else
|
||||
{ &call ("_aesni_decrypt1"); }
|
||||
|
||||
&xorps ($inout0,$inout5); # ^ offset_i
|
||||
&movaps ($rndkey1,$inout4); # pass the checksum
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&xorps ($rndkey1,$inout0); # checksum
|
||||
&movups (&QWP(0,$out,$inp),$inout0);
|
||||
|
||||
&jmp (&label("done"));
|
||||
|
||||
&set_label("two",16);
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&mov ($key,&DWP($key_off,"esp")); # restore key
|
||||
&bsf ($i1,$i1);
|
||||
&shl ($i1,4);
|
||||
&movdqu ($inout4,&QWP(0,$l_));
|
||||
&movdqu ($inout5,&QWP(0,$l_,$i1));
|
||||
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&movdqa ($inout3,$rndkey1);
|
||||
&pxor ($inout4,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout5,$inout4);
|
||||
|
||||
&pxor ($inout0,$inout4); # ^ offset_i
|
||||
&pxor ($inout1,$inout5);
|
||||
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&call ("_aesni_decrypt2");
|
||||
|
||||
&xorps ($inout0,$inout4); # ^ offset_i
|
||||
&xorps ($inout1,$inout5);
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&xorps ($inout3,$inout0); # checksum
|
||||
&movups (&QWP(16*0,$out,$inp),$inout0); # store output
|
||||
&xorps ($inout3,$inout1);
|
||||
&movups (&QWP(16*1,$out,$inp),$inout1);
|
||||
&movaps ($rndkey1,$inout3); # pass the checksum
|
||||
|
||||
&jmp (&label("done"));
|
||||
|
||||
&set_label("three",16);
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&mov ($key,&DWP($key_off,"esp")); # restore key
|
||||
&bsf ($i1,$i1);
|
||||
&shl ($i1,4);
|
||||
&movdqu ($inout3,&QWP(0,$l_));
|
||||
&movdqu ($inout4,&QWP(0,$l_,$i1));
|
||||
&movdqa ($inout5,$inout3);
|
||||
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&movdqu ($inout2,&QWP(16*2,$inp));
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&movdqa (&QWP($checksum,"esp"),$rndkey1);
|
||||
&pxor ($inout3,$rndkey0); # ^ last offset_i
|
||||
&pxor ($inout4,$inout3);
|
||||
&pxor ($inout5,$inout4);
|
||||
|
||||
&pxor ($inout0,$inout3); # ^ offset_i
|
||||
&pxor ($inout1,$inout4);
|
||||
&pxor ($inout2,$inout5);
|
||||
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&call ("_aesni_decrypt3");
|
||||
|
||||
&movdqa ($rndkey1,&QWP($checksum,"esp"));# pass the checksum
|
||||
&xorps ($inout0,$inout3); # ^ offset_i
|
||||
&xorps ($inout1,$inout4);
|
||||
&xorps ($inout2,$inout5);
|
||||
&movups (&QWP(16*0,$out,$inp),$inout0); # store output
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&movups (&QWP(16*1,$out,$inp),$inout1);
|
||||
&pxor ($rndkey1,$inout1);
|
||||
&movups (&QWP(16*2,$out,$inp),$inout2);
|
||||
&pxor ($rndkey1,$inout2);
|
||||
|
||||
&jmp (&label("done"));
|
||||
|
||||
&set_label("four",16);
|
||||
&lea ($i1,&DWP(1,$block));
|
||||
&lea ($i3,&DWP(3,$block));
|
||||
&bsf ($i1,$i1);
|
||||
&bsf ($i3,$i3);
|
||||
&mov ($key,&DWP($key_off,"esp")); # restore key
|
||||
&shl ($i1,4);
|
||||
&shl ($i3,4);
|
||||
&movdqu ($inout2,&QWP(0,$l_));
|
||||
&movdqu ($inout3,&QWP(0,$l_,$i1));
|
||||
&movdqa ($inout4,$inout2);
|
||||
&movdqu ($inout5,&QWP(0,$l_,$i3));
|
||||
|
||||
&pxor ($inout2,$rndkey0); # ^ last offset_i
|
||||
&movdqu ($inout0,&QWP(16*0,$inp)); # load input
|
||||
&pxor ($inout3,$inout2);
|
||||
&movdqu ($inout1,&QWP(16*1,$inp));
|
||||
&pxor ($inout4,$inout3);
|
||||
&movdqa (&QWP(16*0,"esp"),$inout2);
|
||||
&pxor ($inout5,$inout4);
|
||||
&movdqa (&QWP(16*1,"esp"),$inout3);
|
||||
&movdqu ($inout2,&QWP(16*2,$inp));
|
||||
&movdqu ($inout3,&QWP(16*3,$inp));
|
||||
&mov ($rounds,&DWP(240,$key));
|
||||
|
||||
&movdqa (&QWP($checksum,"esp"),$rndkey1);
|
||||
&pxor ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&pxor ($inout1,&QWP(16*1,"esp"));
|
||||
&pxor ($inout2,$inout4);
|
||||
&pxor ($inout3,$inout5);
|
||||
|
||||
&mov ($out,&DWP($out_off,"esp"));
|
||||
&call ("_aesni_decrypt4");
|
||||
|
||||
&movdqa ($rndkey1,&QWP($checksum,"esp"));# pass the checksum
|
||||
&xorps ($inout0,&QWP(16*0,"esp")); # ^ offset_i
|
||||
&xorps ($inout1,&QWP(16*1,"esp"));
|
||||
&xorps ($inout2,$inout4);
|
||||
&movups (&QWP(16*0,$out,$inp),$inout0); # store output
|
||||
&pxor ($rndkey1,$inout0); # checksum
|
||||
&xorps ($inout3,$inout5);
|
||||
&movups (&QWP(16*1,$out,$inp),$inout1);
|
||||
&pxor ($rndkey1,$inout1);
|
||||
&movdqa ($rndkey0,$inout5); # pass last offset_i
|
||||
&movups (&QWP(16*2,$out,$inp),$inout2);
|
||||
&pxor ($rndkey1,$inout2);
|
||||
&movups (&QWP(16*3,$out,$inp),$inout3);
|
||||
&pxor ($rndkey1,$inout3);
|
||||
|
||||
&set_label("done");
|
||||
&mov ($key,&DWP($esp_off,"esp"));
|
||||
&pxor ($inout0,$inout0); # clear register bank
|
||||
&pxor ($inout1,$inout1);
|
||||
&movdqa (&QWP(16*0,"esp"),$inout0); # clear stack
|
||||
&pxor ($inout2,$inout2);
|
||||
&movdqa (&QWP(16*1,"esp"),$inout0);
|
||||
&pxor ($inout3,$inout3);
|
||||
&movdqa (&QWP(16*2,"esp"),$inout0);
|
||||
&pxor ($inout4,$inout4);
|
||||
&movdqa (&QWP(16*3,"esp"),$inout0);
|
||||
&pxor ($inout5,$inout5);
|
||||
&movdqa (&QWP(16*4,"esp"),$inout0);
|
||||
&movdqa (&QWP(16*5,"esp"),$inout0);
|
||||
&movdqa (&QWP(16*6,"esp"),$inout0);
|
||||
|
||||
&lea ("esp",&DWP(0,$key));
|
||||
&mov ($rounds,&wparam(5)); # &offset_i
|
||||
&mov ($rounds_,&wparam(7)); # &checksum
|
||||
&movdqu (&QWP(0,$rounds),$rndkey0);
|
||||
&pxor ($rndkey0,$rndkey0);
|
||||
&movdqu (&QWP(0,$rounds_),$rndkey1);
|
||||
&pxor ($rndkey1,$rndkey1);
|
||||
&function_end("aesni_ocb_decrypt");
|
||||
}
|
||||
}
|
||||
|
||||
######################################################################
|
||||
@@ -2419,7 +3305,7 @@ if ($PREFIX eq "aesni") {
|
||||
&pxor ("xmm3","xmm3");
|
||||
&aesenclast ("xmm2","xmm3");
|
||||
|
||||
&movdqa ("xmm3","xmm1")
|
||||
&movdqa ("xmm3","xmm1");
|
||||
&pslldq ("xmm1",4);
|
||||
&pxor ("xmm3","xmm1");
|
||||
&pslldq ("xmm1",4);
|
||||
@@ -2523,3 +3409,5 @@ if ($PREFIX eq "aesni") {
|
||||
&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
|
||||
|
||||
&asm_finish();
|
||||
|
||||
close STDOUT;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by David S. Miller <davem@devemloft.net> and Andy Polyakov
|
||||
@@ -68,7 +75,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "sparcv9_modes.pl";
|
||||
|
||||
&asm_init(@ARGV);
|
||||
$output = pop;
|
||||
open STDOUT,">$output";
|
||||
|
||||
$::evp=1; # if $evp is set to 0, script generates module with
|
||||
# AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry
|
||||
@@ -83,12 +91,14 @@ $::evp=1; # if $evp is set to 0, script generates module with
|
||||
{
|
||||
my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
|
||||
|
||||
$code.=<<___ if ($::abibits==64);
|
||||
$code.=<<___;
|
||||
#include "sparc_arch.h"
|
||||
|
||||
#ifdef __arch64__
|
||||
.register %g2,#scratch
|
||||
.register %g3,#scratch
|
||||
#endif
|
||||
|
||||
___
|
||||
$code.=<<___;
|
||||
.text
|
||||
|
||||
.globl aes_t4_encrypt
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
#
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -27,12 +34,21 @@
|
||||
# Cortex-A53 1.32 1.29 1.46
|
||||
# Cortex-A57(*) 1.95 0.85 0.93
|
||||
# Denver 1.96 0.86 0.80
|
||||
# Mongoose 1.33 1.20 1.20
|
||||
#
|
||||
# (*) original 3.64/1.34/1.32 results were for r0p0 revision
|
||||
# and are still same even for updated module;
|
||||
|
||||
$flavour = shift;
|
||||
open STDOUT,">".shift;
|
||||
$output = shift;
|
||||
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
$prefix="aes_v8";
|
||||
|
||||
@@ -43,9 +59,12 @@ $code=<<___;
|
||||
.text
|
||||
___
|
||||
$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
|
||||
$code.=".arch armv7-a\n.fpu neon\n.code 32\n" if ($flavour !~ /64/);
|
||||
#^^^^^^ this is done to simplify adoption by not depending
|
||||
# on latest binutils.
|
||||
$code.=<<___ if ($flavour !~ /64/);
|
||||
.arch armv7-a // don't confuse not-so-latest binutils with argv8 :-)
|
||||
.fpu neon
|
||||
.code 32
|
||||
#undef __thumb2__
|
||||
___
|
||||
|
||||
# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
|
||||
# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
|
||||
@@ -60,7 +79,7 @@ my ($zero,$rcon,$mask,$in0,$in1,$tmp,$key)=
|
||||
|
||||
$code.=<<___;
|
||||
.align 5
|
||||
rcon:
|
||||
.Lrcon:
|
||||
.long 0x01,0x01,0x01,0x01
|
||||
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
|
||||
.long 0x1b,0x1b,0x1b,0x1b
|
||||
@@ -89,7 +108,7 @@ $code.=<<___;
|
||||
tst $bits,#0x3f
|
||||
b.ne .Lenc_key_abort
|
||||
|
||||
adr $ptr,rcon
|
||||
adr $ptr,.Lrcon
|
||||
cmp $bits,#192
|
||||
|
||||
veor $zero,$zero,$zero
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@@ -47,8 +54,20 @@
|
||||
#
|
||||
# <ard.biesheuvel@linaro.org>
|
||||
|
||||
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
|
||||
open STDOUT,">$output";
|
||||
$flavour = shift;
|
||||
if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
|
||||
else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
|
||||
|
||||
if ($flavour && $flavour ne "void") {
|
||||
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
||||
( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
||||
die "can't locate arm-xlate.pl";
|
||||
|
||||
open STDOUT,"| \"$^X\" $xlate $flavour $output";
|
||||
} else {
|
||||
open STDOUT,">$output";
|
||||
}
|
||||
|
||||
my ($inp,$out,$len,$key)=("r0","r1","r2","r3");
|
||||
my @XMM=map("q$_",(0..15));
|
||||
@@ -702,7 +721,7 @@ $code.=<<___;
|
||||
# define BSAES_ASM_EXTENDED_KEY
|
||||
# define XTS_CHAIN_TWEAK
|
||||
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
|
||||
# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
|
||||
# define __ARM_MAX_ARCH__ 7
|
||||
#endif
|
||||
|
||||
#ifdef __thumb__
|
||||
@@ -715,10 +734,11 @@ $code.=<<___;
|
||||
|
||||
.text
|
||||
.syntax unified @ ARMv7-capable assembler is expected to handle this
|
||||
#ifdef __thumb2__
|
||||
#if defined(__thumb2__) && !defined(__APPLE__)
|
||||
.thumb
|
||||
#else
|
||||
.code 32
|
||||
# undef __thumb2__
|
||||
#endif
|
||||
|
||||
.type _bsaes_decrypt8,%function
|
||||
@@ -726,7 +746,11 @@ $code.=<<___;
|
||||
_bsaes_decrypt8:
|
||||
adr $const,_bsaes_decrypt8
|
||||
vldmia $key!, {@XMM[9]} @ round 0 key
|
||||
#ifdef __APPLE__
|
||||
adr $const,.LM0ISR
|
||||
#else
|
||||
add $const,$const,#.LM0ISR-_bsaes_decrypt8
|
||||
#endif
|
||||
|
||||
vldmia $const!, {@XMM[8]} @ .LM0ISR
|
||||
veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key
|
||||
@@ -821,7 +845,11 @@ _bsaes_const:
|
||||
_bsaes_encrypt8:
|
||||
adr $const,_bsaes_encrypt8
|
||||
vldmia $key!, {@XMM[9]} @ round 0 key
|
||||
#ifdef __APPLE__
|
||||
adr $const,.LM0SR
|
||||
#else
|
||||
sub $const,$const,#_bsaes_encrypt8-.LM0SR
|
||||
#endif
|
||||
|
||||
vldmia $const!, {@XMM[8]} @ .LM0SR
|
||||
_bsaes_encrypt8_alt:
|
||||
@@ -925,7 +953,11 @@ $code.=<<___;
|
||||
_bsaes_key_convert:
|
||||
adr $const,_bsaes_key_convert
|
||||
vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key
|
||||
#ifdef __APPLE__
|
||||
adr $const,.LM0
|
||||
#else
|
||||
sub $const,$const,#_bsaes_key_convert-.LM0
|
||||
#endif
|
||||
vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key
|
||||
|
||||
vmov.i8 @XMM[8], #0x01 @ bit masks
|
||||
@@ -1333,7 +1365,7 @@ bsaes_cbc_encrypt:
|
||||
vmov @XMM[4],@XMM[15] @ just in case ensure that IV
|
||||
vmov @XMM[5],@XMM[0] @ and input are preserved
|
||||
bl AES_decrypt
|
||||
vld1.8 {@XMM[0]}, [$fp,:64] @ load result
|
||||
vld1.8 {@XMM[0]}, [$fp] @ load result
|
||||
veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV
|
||||
vmov @XMM[15], @XMM[5] @ @XMM[5] holds input
|
||||
vst1.8 {@XMM[0]}, [$rounds] @ write output
|
||||
@@ -1392,7 +1424,12 @@ bsaes_ctr32_encrypt_blocks:
|
||||
vstmia r12, {@XMM[7]} @ save last round key
|
||||
|
||||
vld1.8 {@XMM[0]}, [$ctr] @ load counter
|
||||
#ifdef __APPLE__
|
||||
mov $ctr, #:lower16:(.LREVM0SR-.LM0)
|
||||
add $ctr, $const, $ctr
|
||||
#else
|
||||
add $ctr, $const, #.LREVM0SR-.LM0 @ borrow $ctr
|
||||
#endif
|
||||
vldmia $keysched, {@XMM[4]} @ load round0 key
|
||||
#else
|
||||
ldr r12, [$key, #244]
|
||||
@@ -1449,7 +1486,12 @@ bsaes_ctr32_encrypt_blocks:
|
||||
vldmia $ctr, {@XMM[8]} @ .LREVM0SR
|
||||
mov r5, $rounds @ pass rounds
|
||||
vstmia $fp, {@XMM[10]} @ save next counter
|
||||
#ifdef __APPLE__
|
||||
mov $const, #:lower16:(.LREVM0SR-.LSR)
|
||||
sub $const, $ctr, $const
|
||||
#else
|
||||
sub $const, $ctr, #.LREVM0SR-.LSR @ pass constants
|
||||
#endif
|
||||
|
||||
bl _bsaes_encrypt8_alt
|
||||
|
||||
@@ -1550,7 +1592,7 @@ bsaes_ctr32_encrypt_blocks:
|
||||
rev r8, r8
|
||||
#endif
|
||||
sub sp, sp, #0x10
|
||||
vst1.8 {@XMM[1]}, [sp,:64] @ copy counter value
|
||||
vst1.8 {@XMM[1]}, [sp] @ copy counter value
|
||||
sub sp, sp, #0x10
|
||||
|
||||
.Lctr_enc_short_loop:
|
||||
@@ -1561,7 +1603,7 @@ bsaes_ctr32_encrypt_blocks:
|
||||
bl AES_encrypt
|
||||
|
||||
vld1.8 {@XMM[0]}, [r4]! @ load input
|
||||
vld1.8 {@XMM[1]}, [sp,:64] @ load encrypted counter
|
||||
vld1.8 {@XMM[1]}, [sp] @ load encrypted counter
|
||||
add r8, r8, #1
|
||||
#ifdef __ARMEL__
|
||||
rev r0, r8
|
||||
@@ -2068,9 +2110,11 @@ bsaes_xts_decrypt:
|
||||
vld1.8 {@XMM[8]}, [r0] @ initial tweak
|
||||
adr $magic, .Lxts_magic
|
||||
|
||||
#ifndef XTS_CHAIN_TWEAK
|
||||
tst $len, #0xf @ if not multiple of 16
|
||||
it ne @ Thumb2 thing, sanity check in ARM
|
||||
subne $len, #0x10 @ subtract another 16 bytes
|
||||
#endif
|
||||
subs $len, #0x80
|
||||
|
||||
blo .Lxts_dec_short
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
###################################################################
|
||||
### AES-128 [originally in CTR mode] ###
|
||||
@@ -41,6 +48,7 @@
|
||||
# Nehalem(**) 7.63 6.88 +11%
|
||||
# Atom 17.1 16.4 +4%
|
||||
# Silvermont - 12.9
|
||||
# Goldmont - 8.85
|
||||
#
|
||||
# (*) Comparison is not completely fair, because "this" is ECB,
|
||||
# i.e. no extra processing such as counter values calculation
|
||||
@@ -80,6 +88,7 @@
|
||||
# Nehalem 7.80
|
||||
# Atom 17.9
|
||||
# Silvermont 14.0
|
||||
# Goldmont 10.2
|
||||
#
|
||||
# November 2011.
|
||||
#
|
||||
@@ -99,7 +108,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||
die "can't locate x86_64-xlate.pl";
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
my ($inp,$out,$len,$key,$ivp)=("%rdi","%rsi","%rdx","%rcx");
|
||||
|
||||
1259
crypto/aes/asm/vpaes-armv8.pl
Normal file
1259
crypto/aes/asm/vpaes-armv8.pl
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
######################################################################
|
||||
## Constant-time SSSE3 AES core implementation.
|
||||
@@ -14,7 +21,8 @@
|
||||
# 128-bit key.
|
||||
#
|
||||
# aes-ppc.pl this
|
||||
# G4e 35.5/52.1/(23.8) 11.9(*)/15.4
|
||||
# PPC74x0/G4e 35.5/52.1/(23.8) 11.9(*)/15.4
|
||||
# PPC970/G5 37.9/55.0/(28.5) 22.2/28.5
|
||||
# POWER6 42.7/54.3/(28.2) 63.0/92.8(**)
|
||||
# POWER7 32.3/42.9/(18.4) 18.5/23.3
|
||||
#
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
######################################################################
|
||||
## Constant-time SSSE3 AES core implementation.
|
||||
@@ -51,6 +58,10 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
push(@INC,"${dir}","${dir}../../perlasm");
|
||||
require "x86asm.pl";
|
||||
|
||||
$output = pop;
|
||||
open OUT,">$output";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
&asm_init($ARGV[0],"vpaes-x86.pl",$x86only = $ARGV[$#ARGV] eq "386");
|
||||
|
||||
$PREFIX="vpaes";
|
||||
@@ -901,3 +912,5 @@ $k_dsbo=0x2c0; # decryption sbox final output
|
||||
&function_end("${PREFIX}_cbc_encrypt");
|
||||
|
||||
&asm_finish();
|
||||
|
||||
close STDOUT;
|
||||
|
||||
@@ -1,4 +1,11 @@
|
||||
#!/usr/bin/env perl
|
||||
#! /usr/bin/env perl
|
||||
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
# this file except in compliance with the License. You can obtain a copy
|
||||
# in the file LICENSE in the source distribution or at
|
||||
# https://www.openssl.org/source/license.html
|
||||
|
||||
|
||||
######################################################################
|
||||
## Constant-time SSSE3 AES core implementation.
|
||||
@@ -31,6 +38,7 @@
|
||||
# Nehalem 29.6/40.3/14.6 10.0/11.8
|
||||
# Atom 57.3/74.2/32.1 60.9/77.2(***)
|
||||
# Silvermont 52.7/64.0/19.5 48.8/60.8(***)
|
||||
# Goldmont 38.9/49.0/17.8 10.6/12.6
|
||||
#
|
||||
# (*) "Hyper-threading" in the context refers rather to cache shared
|
||||
# among multiple cores, than to specifically Intel HTT. As vast
|
||||
@@ -57,7 +65,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
||||
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
||||
die "can't locate x86_64-xlate.pl";
|
||||
|
||||
open OUT,"| \"$^X\" $xlate $flavour $output";
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
$PREFIX="vpaes";
|
||||
|
||||
Reference in New Issue
Block a user