Skip to content

Commit

Permalink
Use ADX instructions where applicable in SIKE R2 assembly (aws#1712)
Browse files Browse the repository at this point in the history
* Use ADX instructions where applicable in SIKE R2 assembly

* Fix sike_r2 Makefile
  • Loading branch information
bbutch authored Mar 27, 2020
1 parent ce419d3 commit a9d4d87
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 58 deletions.
19 changes: 19 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ else()
endif()

# The PQ ASM try_compile has to come after we turn on pthread
set(PQ_ASM_COMPILES_ADX false)

if(S2N_NO_PQ_ASM)
message(STATUS "S2N_NO_PQ_ASM flag was detected - forcing usage of generic C code for PQ crypto")
else()
Expand All @@ -121,6 +123,19 @@ else()
message(STATUS "PQ ASM try_compile succeeded - using optimized x86_64 assembly for PQ crypto")
file(GLOB PQ_X86_64_ASM "pq-crypto/sike_r2/fp_x64_asm.S")
list(APPEND PQ_SRC ${PQ_X86_64_ASM})

message(STATUS "Attempting to try_compile PQ ASM with ADX support")
try_compile(PQ_ASM_COMPILES_ADX ${CMAKE_BINARY_DIR}
SOURCES
"${CMAKE_CURRENT_LIST_DIR}/tests/unit/s2n_pq_asm_noop_test.c"
"${CMAKE_CURRENT_LIST_DIR}/pq-crypto/sike_r2/fp_x64_asm.S"
COMPILE_DEFINITIONS "-D_ADX_")
if(PQ_ASM_COMPILES_ADX)
message(STATUS "PQ ASM try_compile with ADX support succeeded - using ASM code with ADX instructions")
# The -D_ADX_ compile flag is added to the project below
else()
message(STATUS "PQ ASM try_compile with ADX support failed - using ASM code without ADX instructions")
endif()
else()
message(STATUS "PQ ASM try_compile failed - using generic C code for PQ crypto")
set(S2N_NO_PQ_ASM ON)
Expand Down Expand Up @@ -173,6 +188,10 @@ if(S2N_NO_PQ)
target_compile_options(${PROJECT_NAME} PUBLIC -DS2N_NO_PQ)
endif()

if(PQ_ASM_COMPILES_ADX)
target_compile_options(${PROJECT_NAME} PUBLIC -D_ADX_)
endif()

target_compile_options(${PROJECT_NAME} PUBLIC -fPIC)

target_compile_definitions(${PROJECT_NAME} PRIVATE -D_POSIX_C_SOURCE=200809L)
Expand Down
6 changes: 3 additions & 3 deletions pq-crypto/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ run-lcov: lcov

.PHONY : clean
clean: decruft
${MAKE} -C bike_r1 decruft
$(MAKE) -C bike_r1 decruft
$(MAKE) -C bike_r2 decruft
${MAKE} -C sike_r1 decruft
${MAKE} -C sike_r2 decruft
$(MAKE) -C sike_r1 decruft
$(MAKE) -C sike_r2 decruft

include ../s2n.mk
5 changes: 5 additions & 0 deletions pq-crypto/sike_r2/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ ifndef S2N_NO_PQ_ASM
ifeq ($(TRY_COMPILE_PQ_ASM), 0)
ASRC=fp_x64_asm.S
OBJS+=$(ASRC:.S=.o)

TRY_COMPILE_PQ_ASM_ADX := $(shell $(CC) -D_ADX_ -c -o ./fp_x64_asm.o ./fp_x64_asm.S > /dev/null 2>&1; echo $$?)
ifeq ($(TRY_COMPILE_PQ_ASM_ADX), 0)
CFLAGS += -D_ADX_
endif
else
CFLAGS += -DS2N_NO_PQ_ASM
endif
Expand Down
62 changes: 7 additions & 55 deletions pq-crypto/sike_r2/fp_x64_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,29 @@

.intel_syntax noprefix

#define _MULX_
/* Requires bmi2 instruction set for mulx. adx instructions are optional, but preferred. */

// Registers that are used for parameter passing:
#define reg_p1 rdi
#define reg_p2 rsi
#define reg_p3 rdx

// Define addition instructions
#ifdef _MULX_
#ifdef _ADX_

#define ADD1 adox
#define ADC1 adox
#define ADD2 adcx
#define ADC2 adcx

#else
#else // _ADX_

#define ADD1 add
#define ADC1 adc
#define ADD2 add
#define ADC2 adc

#endif
#endif
#endif // _ADX_

// The constants below (asm_p434, asm_p434p1, and asm_p434x2) are duplicated from
// P434.c, and correspond to the arrays p434, p434p1, and p434x2. The values are
Expand Down Expand Up @@ -147,7 +145,6 @@ fpadd434_asm:
pop r12
ret


//***********************************************************************
// Field subtraction
// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2]
Expand Down Expand Up @@ -209,9 +206,6 @@ fpsub434_asm:
pop r13
pop r12
ret


#ifdef _MULX_

///////////////////////////////////////////////////////////////// MACRO
// Schoolbook integer multiplication, a full row at a time
Expand Down Expand Up @@ -335,7 +329,7 @@ fpsub434_asm:
mov 56\C, \T8 // C7_final
.endm

#else
#else // _ADX_

.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6
mov rdx, \M0
Expand Down Expand Up @@ -445,8 +439,7 @@ fpsub434_asm:
adc \T8, 0
mov 56\C, \T8 // C7_final
.endm
#endif

#endif // _ADX_

//*****************************************************************************
// 434-bit multiplication using Karatsuba (one level), schoolbook (one level)
Expand Down Expand Up @@ -595,26 +588,6 @@ mul434_asm:
pop r12
ret

#else

//***********************************************************************
// Integer multiplication
// Based on Karatsuba method
// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2]
// NOTE: a=c or b=c are not allowed
//***********************************************************************
.global mul434_asm
mul434_asm:

ret

# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"

#endif


#ifdef _MULX_

///////////////////////////////////////////////////////////////// MACRO
// Schoolbook integer multiplication
// Inputs: memory pointers M0 and M1
Expand Down Expand Up @@ -664,7 +637,7 @@ mul434_asm:
ADC1 \T5, rax
.endm

#else
#else // _ADX_

.macro MUL128x256_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6
mov rdx, \M0
Expand Down Expand Up @@ -692,9 +665,8 @@ mul434_asm:
adc \T4, rdx
adc \T5, 0
.endm
#endif
#endif // _ADX_


//**************************************************************************************
// Montgomery reduction
// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015
Expand Down Expand Up @@ -800,24 +772,6 @@ rdc434_asm:
pop r12
ret

#else

//***********************************************************************
// Montgomery reduction
// Based on comba method
// Operation: c [reg_p2] = a [reg_p1]
// NOTE: a=c is not allowed
//***********************************************************************
.global rdc434_asm
rdc434_asm:

ret

# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE"

#endif


//***********************************************************************
// 434-bit multiprecision addition
// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2]
Expand Down Expand Up @@ -848,7 +802,6 @@ mp_add434_asm:
mov [reg_p3+48], r10
ret


//***********************************************************************
// 2x434-bit multiprecision subtraction/addition
// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p434*2^448
Expand Down Expand Up @@ -933,7 +886,6 @@ mp_subadd434x2_asm:
pop r12
ret


//***********************************************************************
// Double 2x434-bit multiprecision subtraction
// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]
Expand Down

0 comments on commit a9d4d87

Please sign in to comment.