/* * Copyright (c) 2011-2014, Intel Corporation * Authors: Fenghua Yu , * H. Peter Anvin * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. * */ /* * This is the Windows version of the file. It's equivalent to the original * code from Intel, just replacing ELF with COFF pseudo code expressions * where necessary and using Windows ABI rather than System V ABI on x86_64. * Additionally it utilizes the fact that recent versions of gas know the * rdrand, rdseed, and aes opcodes to avoid opaque .byte expression. */ #if defined(__i386__) || defined(__x86_64__) #ifdef __x86_64__ #define LBL(x) x #else #define LBL(x) _##x #endif #define ENTRY(x) \ .align 8 ; \ .globl LBL(x) ; \ .def LBL(x) ; \ .scl 2 ; \ .type 32 ; \ .endef ; \ LBL(x): #define ENDPROC(x) #define RDRAND_RETRY_LIMIT 10 #ifdef __x86_64__ ENTRY(x86_rdrand_bytes) mov %edx, %eax 1: mov $RDRAND_RETRY_LIMIT, %r9d 2: rdrand %r10 jnc 3f mov %r10, (%rcx) add $8, %rcx sub $8, %edx ja 1b 4: sub %edx, %eax ret 3: dec %r9d rep;nop jnz 2b jmp 4b ENDPROC(x86_rdrand_bytes) ENTRY(x86_rdseed_or_rdrand_bytes) push %r12 push %r13 mov (%rdx), %r12d /* RDSEED count */ mov (%r9), %r13d /* RDRAND count */ 1: mov $RDRAND_RETRY_LIMIT, %r10d 2: rdrand %rax jnc 3f mov %rax, (%rcx) add $8, %rcx sub $8, %r12d ja 1b 4: sub %r12d, (%rdx) sub %r13d, (%r9) pop %r13 pop %r12 ret 3: rdrand %rax jnc 5f mov %rax, (%r8) add $8, %r8 sub $8, %r13d ja 1b jmp 4b 5: dec %r10d rep;nop jnz 2b jmp 4b ENDPROC(x86_rdseed_or_rdrand_bytes) #define SETPTR(var,ptr) leaq var(%rip),ptr #define PTR0 %rcx #define PTR1 %rdx #define PTR2 %r9 #define CTR3 %eax #define NPTR2 1 /* %rcx = %r1, only 0-7 valid here */ #elif defined(__i386__) ENTRY(x86_rdrand_bytes) push %ebp mov %esp, %ebp push %edi push %esi movl 8(%ebp), %edi movl 12(%ebp), %esi mov %esi, %eax 1: mov $RDRAND_RETRY_LIMIT, %ecx 2: rdrand %edx jnc 3f mov %edx, (%edi) add $4, %edi sub $4, %esi ja 1b 4: sub %esi, %eax pop %esi pop %edi pop %ebp ret 3: dec %ecx rep;nop jnz 2b jmp 4b ENDPROC(x86_rdrand_bytes) ENTRY(x86_rdseed_or_rdrand_bytes) push %ebp mov %esp, %ebp push %edi push %esi push %ebx mov 12(%ebp), %ebx mov 20(%ebp), %esi mov 8(%ebp), %edi /* RDSEED pointer */ mov 16(%ebp), %edx /* RDRAND pointer */ mov (%ebx), %ebx /* RDSEED count */ mov (%esi), %esi /* RDRAND count */ 1: mov $RDRAND_RETRY_LIMIT, %ecx 2: rdseed %eax jnc 3f mov %eax, (%edi) add $4, %edi sub $4, %ebx ja 1b 4: mov 12(%ebp), %edx mov 20(%ebp), %eax sub %ebx, (%edx) /* RDSEED count */ sub %esi, (%eax) /* RDRAND count */ pop %ebx pop %esi pop %edi pop %ebp ret 3: rdrand %eax jnc 5f mov %eax, (%edx) add $4, %edx sub $4, %esi jnz 1b ja 4b 5: dec %ecx rep;nop jnz 2b jmp 4b ENDPROC(x86_rdseed_or_rdrand_bytes) #define SETPTR(var,ptr) movl $(var),ptr #define PTR0 %eax #define PTR1 %edx #define PTR2 %ecx #define CTR3 %esi #define NPTR2 1 /* %rcx = %r1 */ #endif ENTRY(x86_aes_mangle) #ifdef __i386__ push %ebp mov %esp, %ebp movl 8(%ebp), %eax movl 12(%ebp), %edx push %esi #endif movl $512, CTR3 /* Number of rounds */ movdqa (0*16)(PTR1), %xmm0 movdqa (1*16)(PTR1), %xmm1 movdqa (2*16)(PTR1), %xmm2 movdqa (3*16)(PTR1), %xmm3 movdqa (4*16)(PTR1), %xmm4 movdqa (5*16)(PTR1), %xmm5 movdqa (6*16)(PTR1), %xmm6 movdqa (7*16)(PTR1), %xmm7 #ifdef __x86_64__ SETPTR(aes_round_keys, PTR2) 1: #else 1: SETPTR(aes_round_keys, PTR2) #endif /* 8192 = 512 (rounds) * 16 (bytes) */ pxor (0*8192)(PTR0), %xmm0 pxor (1*8192)(PTR0), %xmm1 pxor (2*8192)(PTR0), %xmm2 pxor (3*8192)(PTR0), %xmm3 pxor (4*8192)(PTR0), %xmm4 pxor (5*8192)(PTR0), %xmm5 pxor (6*8192)(PTR0), %xmm6 pxor (7*8192)(PTR0), %xmm7 add $16, PTR0 offset = 0 .rept 10 #ifdef __x86_64__ movdqa offset(PTR2), %xmm8 offset = offset + 16 aesenc %xmm8, %xmm0 aesenc %xmm8, %xmm1 aesenc %xmm8, %xmm2 aesenc %xmm8, %xmm3 aesenc %xmm8, %xmm4 aesenc %xmm8, %xmm5 aesenc %xmm8, %xmm6 aesenc %xmm8, %xmm7 #else aesenc (PTR2), %xmm0 aesenc (PTR2), %xmm1 aesenc (PTR2), %xmm2 aesenc (PTR2), %xmm3 aesenc (PTR2), %xmm4 aesenc (PTR2), %xmm5 aesenc (PTR2), %xmm6 aesenc (PTR2), %xmm7 add $16, PTR2 #endif .endr #ifdef __x86_64__ movdqa offset(PTR2), %xmm8 aesenclast %xmm8, %xmm0 aesenclast %xmm8, %xmm1 aesenclast %xmm8, %xmm2 aesenclast %xmm8, %xmm3 aesenclast %xmm8, %xmm4 aesenclast %xmm8, %xmm5 aesenclast %xmm8, %xmm6 aesenclast %xmm8, %xmm7 #else aesenclast (PTR2), %xmm0 aesenclast (PTR2), %xmm1 aesenclast (PTR2), %xmm2 aesenclast (PTR2), %xmm3 aesenclast (PTR2), %xmm4 aesenclast (PTR2), %xmm5 aesenclast (PTR2), %xmm6 aesenclast (PTR2), %xmm7 #endif sub $1, CTR3 jnz 1b movdqa %xmm0, (0*16)(PTR1) movdqa %xmm1, (1*16)(PTR1) movdqa %xmm2, (2*16)(PTR1) movdqa %xmm3, (3*16)(PTR1) movdqa %xmm4, (4*16)(PTR1) movdqa %xmm5, (5*16)(PTR1) movdqa %xmm6, (6*16)(PTR1) movdqa %xmm7, (7*16)(PTR1) #ifdef __i386__ pop %esi pop %ebp #endif ret ENDPROC(x86_aes_mangle) ENTRY(x86_aes_expand_key) #ifdef __i386__ push %ebp mov %esp, %ebp movl 8(%ebp), %eax #endif SETPTR(aes_round_keys, PTR1) movdqu (PTR0), %xmm0 movdqa %xmm0, (PTR1) /* First slot = the plain key */ add $16, PTR1 aeskeygenassist $0x01,%xmm0,%xmm1 call 1f aeskeygenassist $0x02,%xmm0,%xmm1 call 1f aeskeygenassist $0x04,%xmm0,%xmm1 call 1f aeskeygenassist $0x08,%xmm0,%xmm1 call 1f aeskeygenassist $0x10,%xmm0,%xmm1 call 1f aeskeygenassist $0x20,%xmm0,%xmm1 call 1f aeskeygenassist $0x40,%xmm0,%xmm1 call 1f aeskeygenassist $0x80,%xmm0,%xmm1 call 1f aeskeygenassist $0x1b,%xmm0,%xmm1 call 1f aeskeygenassist $0x36,%xmm0,%xmm1 call 1f #ifdef __i386__ pop %ebp #endif ret 1: pshufd $0xff, %xmm1, %xmm1 movdqa %xmm0, %xmm2 pslldq $4, %xmm2 pxor %xmm2, %xmm0 pslldq $4, %xmm2 pxor %xmm2, %xmm0 pslldq $4, %xmm2 pxor %xmm2, %xmm0 pxor %xmm1, %xmm0 movdqa %xmm0, (PTR1) add $16, PTR1 ret ENDPROC(x86_aes_expand_key) .bss .balign 64 aes_round_keys: .space 11*16 #endif /* i386 or x86_64 */