diff --git a/CMakeLists.txt b/CMakeLists.txt index f1399502225..ac625280a9a 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,6 +107,8 @@ else() endif() # The PQ ASM try_compile has to come after we turn on pthread +set(PQ_ASM_COMPILES_ADX false) + if(S2N_NO_PQ_ASM) message(STATUS "S2N_NO_PQ_ASM flag was detected - forcing usage of generic C code for PQ crypto") else() @@ -121,6 +123,19 @@ else() message(STATUS "PQ ASM try_compile succeeded - using optimized x86_64 assembly for PQ crypto") file(GLOB PQ_X86_64_ASM "pq-crypto/sike_r2/fp_x64_asm.S") list(APPEND PQ_SRC ${PQ_X86_64_ASM}) + + message(STATUS "Attempting to try_compile PQ ASM with ADX support") + try_compile(PQ_ASM_COMPILES_ADX ${CMAKE_BINARY_DIR} + SOURCES + "${CMAKE_CURRENT_LIST_DIR}/tests/unit/s2n_pq_asm_noop_test.c" + "${CMAKE_CURRENT_LIST_DIR}/pq-crypto/sike_r2/fp_x64_asm.S" + COMPILE_DEFINITIONS "-D_ADX_") + if(PQ_ASM_COMPILES_ADX) + message(STATUS "PQ ASM try_compile with ADX support succeeded - using ASM code with ADX instructions") + # The -D_ADX_ compile flag is added to the project below + else() + message(STATUS "PQ ASM try_compile with ADX support failed - using ASM code without ADX instructions") + endif() else() message(STATUS "PQ ASM try_compile failed - using generic C code for PQ crypto") set(S2N_NO_PQ_ASM ON) @@ -173,6 +188,10 @@ if(S2N_NO_PQ) target_compile_options(${PROJECT_NAME} PUBLIC -DS2N_NO_PQ) endif() +if(PQ_ASM_COMPILES_ADX) + target_compile_options(${PROJECT_NAME} PUBLIC -D_ADX_) +endif() + target_compile_options(${PROJECT_NAME} PUBLIC -fPIC) target_compile_definitions(${PROJECT_NAME} PRIVATE -D_POSIX_C_SOURCE=200809L) diff --git a/pq-crypto/Makefile b/pq-crypto/Makefile index 8774fcab9e7..1fef5f80356 100644 --- a/pq-crypto/Makefile +++ b/pq-crypto/Makefile @@ -53,9 +53,9 @@ run-lcov: lcov .PHONY : clean clean: decruft - ${MAKE} -C bike_r1 decruft + $(MAKE) -C bike_r1 decruft $(MAKE) -C bike_r2 decruft - ${MAKE} -C sike_r1 decruft - ${MAKE} -C sike_r2 decruft + $(MAKE) -C sike_r1 decruft + $(MAKE) -C sike_r2 decruft include ../s2n.mk diff --git a/pq-crypto/sike_r2/Makefile b/pq-crypto/sike_r2/Makefile index 26d3400a2c2..08a339488e1 100644 --- a/pq-crypto/sike_r2/Makefile +++ b/pq-crypto/sike_r2/Makefile @@ -40,6 +40,11 @@ ifndef S2N_NO_PQ_ASM ifeq ($(TRY_COMPILE_PQ_ASM), 0) ASRC=fp_x64_asm.S OBJS+=$(ASRC:.S=.o) + + TRY_COMPILE_PQ_ASM_ADX := $(shell $(CC) -D_ADX_ -c -o ./fp_x64_asm.o ./fp_x64_asm.S > /dev/null 2>&1; echo $$?) + ifeq ($(TRY_COMPILE_PQ_ASM_ADX), 0) + CFLAGS += -D_ADX_ + endif else CFLAGS += -DS2N_NO_PQ_ASM endif diff --git a/pq-crypto/sike_r2/fp_x64_asm.S b/pq-crypto/sike_r2/fp_x64_asm.S index 95bd24ba2b1..fc867a96596 100644 --- a/pq-crypto/sike_r2/fp_x64_asm.S +++ b/pq-crypto/sike_r2/fp_x64_asm.S @@ -6,7 +6,7 @@ .intel_syntax noprefix -#define _MULX_ +/* Requires bmi2 instruction set for mulx. adx instructions are optional, but preferred. */ // Registers that are used for parameter passing: #define reg_p1 rdi @@ -14,7 +14,6 @@ #define reg_p3 rdx // Define addition instructions -#ifdef _MULX_ #ifdef _ADX_ #define ADD1 adox @@ -22,15 +21,14 @@ #define ADD2 adcx #define ADC2 adcx -#else +#else // _ADX_ #define ADD1 add #define ADC1 adc #define ADD2 add #define ADC2 adc -#endif -#endif +#endif // _ADX_ // The constants below (asm_p434, asm_p434p1, and asm_p434x2) are duplicated from // P434.c, and correspond to the arrays p434, p434p1, and p434x2. The values are @@ -147,7 +145,6 @@ fpadd434_asm: pop r12 ret - //*********************************************************************** // Field subtraction // Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] @@ -209,9 +206,6 @@ fpsub434_asm: pop r13 pop r12 ret - - -#ifdef _MULX_ ///////////////////////////////////////////////////////////////// MACRO // Schoolbook integer multiplication, a full row at a time @@ -335,7 +329,7 @@ fpsub434_asm: mov 56\C, \T8 // C7_final .endm -#else +#else // _ADX_ .macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6 mov rdx, \M0 @@ -445,8 +439,7 @@ fpsub434_asm: adc \T8, 0 mov 56\C, \T8 // C7_final .endm -#endif - +#endif // _ADX_ //***************************************************************************** // 434-bit multiplication using Karatsuba (one level), schoolbook (one level) @@ -595,26 +588,6 @@ mul434_asm: pop r12 ret -#else - -//*********************************************************************** -// Integer multiplication -// Based on Karatsuba method -// Operation: c [reg_p3] = a [reg_p1] * b [reg_p2] -// NOTE: a=c or b=c are not allowed -//*********************************************************************** -.global mul434_asm -mul434_asm: - - ret - -# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" - -#endif - - -#ifdef _MULX_ - ///////////////////////////////////////////////////////////////// MACRO // Schoolbook integer multiplication // Inputs: memory pointers M0 and M1 @@ -664,7 +637,7 @@ mul434_asm: ADC1 \T5, rax .endm -#else +#else // _ADX_ .macro MUL128x256_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6 mov rdx, \M0 @@ -692,9 +665,8 @@ mul434_asm: adc \T4, rdx adc \T5, 0 .endm -#endif +#endif // _ADX_ - //************************************************************************************** // Montgomery reduction // Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 @@ -800,24 +772,6 @@ rdc434_asm: pop r12 ret -#else - -//*********************************************************************** -// Montgomery reduction -// Based on comba method -// Operation: c [reg_p2] = a [reg_p1] -// NOTE: a=c is not allowed -//*********************************************************************** -.global rdc434_asm -rdc434_asm: - - ret - -# error "CONFIGURATION NOT SUPPORTED. TRY USE_MULX=TRUE" - -#endif - - //*********************************************************************** // 434-bit multiprecision addition // Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] @@ -848,7 +802,6 @@ mp_add434_asm: mov [reg_p3+48], r10 ret - //*********************************************************************** // 2x434-bit multiprecision subtraction/addition // Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p434*2^448 @@ -933,7 +886,6 @@ mp_subadd434x2_asm: pop r12 ret - //*********************************************************************** // Double 2x434-bit multiprecision subtraction // Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2]