diff --git a/net/wireguard/compat/Kbuild.include b/net/wireguard/compat/Kbuild.include index 209c0ccd49..0192ecdcbc 100644 --- a/net/wireguard/compat/Kbuild.include +++ b/net/wireguard/compat/Kbuild.include @@ -12,6 +12,10 @@ ifeq ($(wildcard $(srctree)/include/linux/ptr_ring.h),) ccflags-y += -I$(kbuild-dir)/compat/ptr_ring/include endif +ifeq ($(wildcard $(srctree)/include/linux/skb_array.h),) +ccflags-y += -I$(kbuild-dir)/compat/skb_array/include +endif + ifeq ($(wildcard $(srctree)/include/linux/siphash.h),) ccflags-y += -I$(kbuild-dir)/compat/siphash/include wireguard-y += compat/siphash/siphash.o @@ -65,6 +69,10 @@ ifeq ($(wildcard $(srctree)/arch/arm64/include/asm/neon.h)$(CONFIG_ARM64),y) ccflags-y += -I$(kbuild-dir)/compat/neon-arm/include endif +ifeq ($(wildcard $(srctree)/include/net/dst_metadata.h),) +ccflags-y += -I$(kbuild-dir)/compat/dstmetadata/include +endif + ifeq ($(CONFIG_X86_64),y) ifeq ($(ssse3_instr),) ssse3_instr := $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) diff --git a/net/wireguard/compat/compat.h b/net/wireguard/compat/compat.h index cd1894e904..36e902b062 100644 --- a/net/wireguard/compat/compat.h +++ b/net/wireguard/compat/compat.h @@ -22,9 +22,7 @@ #endif #endif #ifdef UTS_UBUNTU_RELEASE_ABI -#if LINUX_VERSION_CODE == KERNEL_VERSION(3, 13, 11) -#define ISUBUNTU1404 -#elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0) #define ISUBUNTU1604 #elif LINUX_VERSION_CODE < KERNEL_VERSION(4, 16, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(4, 15, 0) #define ISUBUNTU1804 @@ -219,7 +217,7 @@ static inline void skb_scrub_packet(struct sk_buff *skb, bool xnet) #define skb_scrub_packet(a, b) skb_scrub_packet(a) #endif -#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 63) || defined(ISUBUNTU1404)) && !defined(ISRHEL7) +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 63)) && !defined(ISRHEL7) #include static inline u32 __compat_prandom_u32_max(u32 ep_ro) { @@ -268,7 +266,7 @@ static inline u32 __compat_prandom_u32_max(u32 ep_ro) #endif #endif -#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 3) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 35) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 24) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) && !defined(ISUBUNTU1404)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 33) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 60) && !defined(ISRHEL7)) +#if (LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 3) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 17, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 16, 35) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 15, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 14, 24) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 33) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 11, 0)) || (LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 60) && !defined(ISRHEL7)) static inline void memzero_explicit(void *s, size_t count) { memset(s, 0, count); @@ -502,7 +500,7 @@ static inline void *__compat_kvzalloc(size_t size, gfp_t flags) #define kvzalloc __compat_kvzalloc #endif -#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41)) && !defined(ISUBUNTU1404) +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(3, 15, 0) && LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0)) || LINUX_VERSION_CODE < KERNEL_VERSION(3, 12, 41)) #include #include static inline void __compat_kvfree(const void *addr) diff --git a/net/wireguard/compat/dstmetadata/include/net/dst_metadata.h b/net/wireguard/compat/dstmetadata/include/net/dst_metadata.h new file mode 100644 index 0000000000..995094d4f0 --- /dev/null +++ b/net/wireguard/compat/dstmetadata/include/net/dst_metadata.h @@ -0,0 +1,3 @@ +#ifndef skb_valid_dst +#define skb_valid_dst(skb) (!!skb_dst(skb)) +#endif diff --git a/net/wireguard/compat/skb_array/include/linux/skb_array.h b/net/wireguard/compat/skb_array/include/linux/skb_array.h new file mode 100644 index 0000000000..c91fedcdbf --- /dev/null +++ b/net/wireguard/compat/skb_array/include/linux/skb_array.h @@ -0,0 +1,11 @@ +#ifndef _WG_SKB_ARRAY_H +#define _WG_SKB_ARRAY_H + +#include + +static void __skb_array_destroy_skb(void *ptr) +{ + kfree_skb(ptr); +} + +#endif diff --git a/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c index f26ed5d897..8b6872a2f0 100644 --- a/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c +++ b/net/wireguard/crypto/zinc/curve25519/curve25519-x86_64.c @@ -34,11 +34,11 @@ static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) asm volatile( /* Clear registers to propagate the carry bit */ - " xor %%r8, %%r8;" - " xor %%r9, %%r9;" - " xor %%r10, %%r10;" - " xor %%r11, %%r11;" - " xor %1, %1;" + " xor %%r8d, %%r8d;" + " xor %%r9d, %%r9d;" + " xor %%r10d, %%r10d;" + " xor %%r11d, %%r11d;" + " xor %k1, %k1;" /* Begin addition chain */ " addq 0(%3), %0;" @@ -52,10 +52,9 @@ static inline u64 add_scalar(u64 *out, const u64 *f1, u64 f2) /* Return the carry bit in a register */ " adcx %%r11, %1;" - : "+&r" (f2), "=&r" (carry_r) - : "r" (out), "r" (f1) - : "%r8", "%r9", "%r10", "%r11", "memory", "cc" - ); + : "+&r"(f2), "=&r"(carry_r) + : "r"(out), "r"(f1) + : "%r8", "%r9", "%r10", "%r11", "memory", "cc"); return carry_r; } @@ -82,7 +81,7 @@ static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) " cmovc %0, %%rax;" /* Step 2: Add carry*38 to the original sum */ - " xor %%rcx, %%rcx;" + " xor %%ecx, %%ecx;" " add %%rax, %%r8;" " adcx %%rcx, %%r9;" " movq %%r9, 8(%1);" @@ -96,17 +95,16 @@ static inline void fadd(u64 *out, const u64 *f1, const u64 *f2) " cmovc %0, %%rax;" " add %%rax, %%r8;" " movq %%r8, 0(%1);" - : "+&r" (f2) - : "r" (out), "r" (f1) - : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" - ); + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); } -/* Computes the field substraction of two field elements */ +/* Computes the field subtraction of two field elements */ static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) { asm volatile( - /* Compute the raw substraction of f1-f2 */ + /* Compute the raw subtraction of f1-f2 */ " movq 0(%1), %%r8;" " subq 0(%2), %%r8;" " movq 8(%1), %%r9;" @@ -123,7 +121,7 @@ static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) " mov $38, %%rcx;" " cmovc %%rcx, %%rax;" - /* Step 2: Substract carry*38 from the original difference */ + /* Step 2: Subtract carry*38 from the original difference */ " sub %%rax, %%r8;" " sbb $0, %%r9;" " sbb $0, %%r10;" @@ -139,10 +137,9 @@ static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) " movq %%r9, 8(%0);" " movq %%r10, 16(%0);" " movq %%r11, 24(%0);" - : - : "r" (out), "r" (f1), "r" (f2) - : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc" - ); + : + : "r"(out), "r"(f1), "r"(f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); } /* Computes a field multiplication: out <- f1 * f2 @@ -150,239 +147,400 @@ static inline void fsub(u64 *out, const u64 *f1, const u64 *f2) static inline void fmul(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) { asm volatile( + /* Compute the raw multiplication: tmp <- src1 * src2 */ /* Compute src1[0] * src2 */ - " movq 0(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ - " movq 8(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ - " movq 16(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ - " movq 24(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + /* Line up pointers */ - " mov %0, %1;" " mov %2, %0;" + " mov %3, %2;" /* Wrap the result back into the field */ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" - " mulxq 32(%1), %%r8, %%r13;" - " xor %3, %3;" - " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%rbx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" " adcx %%r13, %%r9;" - " adoxq 8(%1), %%r9;" - " mulxq 48(%1), %%r10, %%r13;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" " adcx %%rbx, %%r10;" - " adoxq 16(%1), %%r10;" - " mulxq 56(%1), %%r11, %%rax;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" " adcx %%r13, %%r11;" - " adoxq 24(%1), %%r11;" - " adcx %3, %%rax;" - " adox %3, %%rax;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" - " adcx %3, %%r9;" - " movq %%r9, 8(%0);" - " adcx %3, %%r10;" - " movq %%r10, 16(%0);" - " adcx %3, %%r11;" - " movq %%r11, 24(%0);" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" - " movq %%r8, 0(%0);" - : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) - : - : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" - ); + " movq %%r8, 0(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); } /* Computes two field multiplications: - * out[0] <- f1[0] * f2[0] - * out[1] <- f1[1] * f2[1] - * Uses the 16-element buffer tmp for intermediate results. */ + * out[0] <- f1[0] * f2[0] + * out[1] <- f1[1] * f2[1] + * Uses the 16-element buffer tmp for intermediate results: */ static inline void fmul2(u64 *out, const u64 *f1, const u64 *f2, u64 *tmp) { asm volatile( + /* Compute the raw multiplication tmp[0] <- f1[0] * f2[0] */ /* Compute src1[0] * src2 */ - " movq 0(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 0(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 8(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ - " movq 8(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 8(%0), %%r8;" " movq %%r8, 8(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 16(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ - " movq 16(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 16(%0), %%r8;" " movq %%r8, 16(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 24(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ - " movq 24(%1), %%rdx;" - " mulxq 0(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 24(%0), %%r8;" " movq %%r8, 24(%0);" - " mulxq 8(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 32(%0);" - " mulxq 16(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 40(%0);" " mov $0, %%r8;" - " mulxq 24(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 48(%0);" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 56(%0);" + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" /* Compute the raw multiplication tmp[1] <- f1[1] * f2[1] */ /* Compute src1[0] * src2 */ - " movq 32(%1), %%rdx;" - " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " movq %%r8, 64(%0);" - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " movq %%r10, 72(%0);" - " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" + " movq 32(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 64(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 72(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + /* Compute src1[1] * src2 */ - " movq 40(%1), %%rdx;" - " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 72(%0), %%r8;" " movq %%r8, 72(%0);" - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 80(%0);" - " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + " movq 40(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 72(%2), %%r8;" + " movq %%r8, 72(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 80(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + /* Compute src1[2] * src2 */ - " movq 48(%1), %%rdx;" - " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 80(%0), %%r8;" " movq %%r8, 80(%0);" - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 88(%0);" - " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " mov $0, %%r8;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" + " movq 48(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 80(%2), %%r8;" + " movq %%r8, 80(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 88(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + /* Compute src1[3] * src2 */ - " movq 56(%1), %%rdx;" - " mulxq 32(%3), %%r8, %%r9;" " xor %%r10, %%r10;" " adcxq 88(%0), %%r8;" " movq %%r8, 88(%0);" - " mulxq 40(%3), %%r10, %%r11;" " adox %%r9, %%r10;" " adcx %%rbx, %%r10;" " movq %%r10, 96(%0);" - " mulxq 48(%3), %%rbx, %%r13;" " adox %%r11, %%rbx;" " adcx %%r14, %%rbx;" " movq %%rbx, 104(%0);" " mov $0, %%r8;" - " mulxq 56(%3), %%r14, %%rdx;" " adox %%r13, %%r14;" " adcx %%rax, %%r14;" " movq %%r14, 112(%0);" " mov $0, %%rax;" - " adox %%rdx, %%rax;" " adcx %%r8, %%rax;" " movq %%rax, 120(%0);" + " movq 56(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 88(%2), %%r8;" + " movq %%r8, 88(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 96(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 104(%2);" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 112(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 120(%2);" + /* Line up pointers */ - " mov %0, %1;" " mov %2, %0;" + " mov %3, %2;" /* Wrap the results back into the field */ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" - " mulxq 32(%1), %%r8, %%r13;" - " xor %3, %3;" - " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%rbx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" " adcx %%r13, %%r9;" - " adoxq 8(%1), %%r9;" - " mulxq 48(%1), %%r10, %%r13;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" " adcx %%rbx, %%r10;" - " adoxq 16(%1), %%r10;" - " mulxq 56(%1), %%r11, %%rax;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" " adcx %%r13, %%r11;" - " adoxq 24(%1), %%r11;" - " adcx %3, %%rax;" - " adox %3, %%rax;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" - " adcx %3, %%r9;" - " movq %%r9, 8(%0);" - " adcx %3, %%r10;" - " movq %%r10, 16(%0);" - " adcx %3, %%r11;" - " movq %%r11, 24(%0);" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" - " movq %%r8, 0(%0);" + " movq %%r8, 0(%2);" /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" - " mulxq 96(%1), %%r8, %%r13;" - " xor %3, %3;" - " adoxq 64(%1), %%r8;" - " mulxq 104(%1), %%r9, %%rbx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" " adcx %%r13, %%r9;" - " adoxq 72(%1), %%r9;" - " mulxq 112(%1), %%r10, %%r13;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" " adcx %%rbx, %%r10;" - " adoxq 80(%1), %%r10;" - " mulxq 120(%1), %%r11, %%rax;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" " adcx %%r13, %%r11;" - " adoxq 88(%1), %%r11;" - " adcx %3, %%rax;" - " adox %3, %%rax;" + " adoxq 88(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" " imul %%rdx, %%rax;" /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" - " adcx %3, %%r9;" - " movq %%r9, 40(%0);" - " adcx %3, %%r10;" - " movq %%r10, 48(%0);" - " adcx %3, %%r11;" - " movq %%r11, 56(%0);" + " adcx %1, %%r9;" + " movq %%r9, 40(%2);" + " adcx %1, %%r10;" + " movq %%r10, 48(%2);" + " adcx %1, %%r11;" + " movq %%r11, 56(%2);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" - " movq %%r8, 32(%0);" - : "+&r" (tmp), "+&r" (f1), "+&r" (out), "+&r" (f2) - : - : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "memory", "cc" - ); + " movq %%r8, 32(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", + "%r14", "memory", "cc"); } -/* Computes the field multiplication of four-element f1 with value in f2 */ +/* Computes the field multiplication of four-element f1 with value in f2 + * Requires f2 to be smaller than 2^17 */ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) { register u64 f2_r asm("rdx") = f2; asm volatile( /* Compute the raw multiplication of f1*f2 */ - " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ - " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ + " mulxq 0(%2), %%r8, %%rcx;" /* f1[0]*f2 */ + " mulxq 8(%2), %%r9, %%rbx;" /* f1[1]*f2 */ " add %%rcx, %%r9;" " mov $0, %%rcx;" - " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ + " mulxq 16(%2), %%r10, %%r13;" /* f1[2]*f2 */ " adcx %%rbx, %%r10;" - " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ + " mulxq 24(%2), %%r11, %%rax;" /* f1[3]*f2 */ " adcx %%r13, %%r11;" " adcx %%rcx, %%rax;" @@ -406,17 +564,17 @@ static inline void fmul_scalar(u64 *out, const u64 *f1, u64 f2) " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" " movq %%r8, 0(%1);" - : "+&r" (f2_r) - : "r" (out), "r" (f1) - : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "memory", "cc" - ); + : "+&r"(f2_r) + : "r"(out), "r"(f1) + : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", + "memory", "cc"); } /* Computes p1 <- bit ? p2 : p1 in constant time */ static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) { asm volatile( - /* Invert the polarity of bit to match cmov expectations */ + /* Transfer bit into CF flag */ " add $18446744073709551615, %0;" /* cswap p1[0], p2[0] */ @@ -490,10 +648,9 @@ static inline void cswap2(u64 bit, const u64 *p1, const u64 *p2) " cmovc %%r10, %%r9;" " movq %%r8, 56(%1);" " movq %%r9, 56(%2);" - : "+&r" (bit) - : "r" (p1), "r" (p2) - : "%r8", "%r9", "%r10", "memory", "cc" - ); + : "+&r"(bit) + : "r"(p1), "r"(p2) + : "%r8", "%r9", "%r10", "memory", "cc"); } /* Computes the square of a field element: out <- f * f @@ -504,18 +661,25 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) /* Compute the raw multiplication: tmp <- f * f */ /* Step 1: Compute all partial products */ - " movq 0(%1), %%rdx;" /* f[0] */ - " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ - " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 24(%1), %%rdx;" /* f[3] */ - " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ - " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ - " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ - " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ /* Step 2: Compute two parallel carry chains */ - " xor %%r15, %%r15;" + " xor %%r15d, %%r15d;" " adox %%rax, %%r10;" " adcx %%r8, %%r8;" " adox %%rcx, %%r11;" @@ -530,39 +694,50 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) " adcx %%r14, %%r14;" /* Step 3: Compute intermediate squares */ - " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ - " movq %%rax, 0(%0);" - " add %%rcx, %%r8;" " movq %%r8, 8(%0);" - " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ - " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" - " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" - " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ - " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" - " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" - " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ - " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" - " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" /* Line up pointers */ - " mov %0, %1;" - " mov %2, %0;" + " mov %1, %0;" + " mov %2, %1;" /* Wrap the result back into the field */ /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" - " mulxq 32(%1), %%r8, %%r13;" - " xor %%rcx, %%rcx;" - " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%rbx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" " adcx %%r13, %%r9;" - " adoxq 8(%1), %%r9;" - " mulxq 48(%1), %%r10, %%r13;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" " adcx %%rbx, %%r10;" - " adoxq 16(%1), %%r10;" - " mulxq 56(%1), %%r11, %%rax;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" " adcx %%r13, %%r11;" - " adoxq 24(%1), %%r11;" + " adoxq 24(%0), %%r11;" " adcx %%rcx, %%rax;" " adox %%rcx, %%rax;" " imul %%rdx, %%rax;" @@ -570,43 +745,50 @@ static inline void fsqr(u64 *out, const u64 *f, u64 *tmp) /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %%rcx, %%r9;" - " movq %%r9, 8(%0);" + " movq %%r9, 8(%1);" " adcx %%rcx, %%r10;" - " movq %%r10, 16(%0);" + " movq %%r10, 16(%1);" " adcx %%rcx, %%r11;" - " movq %%r11, 24(%0);" + " movq %%r11, 24(%1);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" - " movq %%r8, 0(%0);" - : "+&r,&r" (tmp), "+&r,&r" (f) - : "r,m" (out) - : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" - ); + " movq %%r8, 0(%1);" + : "+&r,&r"(f), "+&r,&r"(tmp) + : "r,m"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); } /* Computes two field squarings: - * out[0] <- f[0] * f[0] - * out[1] <- f[1] * f[1] + * out[0] <- f[0] * f[0] + * out[1] <- f[1] * f[1] * Uses the 16-element buffer tmp for intermediate results */ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) { asm volatile( /* Step 1: Compute all partial products */ - " movq 0(%1), %%rdx;" /* f[0] */ - " mulxq 8(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ - " mulxq 16(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 24(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 24(%1), %%rdx;" /* f[3] */ - " mulxq 8(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ - " mulxq 16(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ - " movq 8(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ - " mulxq 16(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + " movq 0(%0), %%rdx;" /* f[0] */ + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 24(%0), %%rdx;" /* f[3] */ + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ /* Step 2: Compute two parallel carry chains */ - " xor %%r15, %%r15;" + " xor %%r15d, %%r15d;" " adox %%rax, %%r10;" " adcx %%r8, %%r8;" " adox %%rcx, %%r11;" @@ -621,32 +803,50 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " adcx %%r14, %%r14;" /* Step 3: Compute intermediate squares */ - " movq 0(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ - " movq %%rax, 0(%0);" - " add %%rcx, %%r8;" " movq %%r8, 8(%0);" - " movq 8(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ - " adcx %%rax, %%r9;" " movq %%r9, 16(%0);" - " adcx %%rcx, %%r10;" " movq %%r10, 24(%0);" - " movq 16(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ - " adcx %%rax, %%r11;" " movq %%r11, 32(%0);" - " adcx %%rcx, %%rbx;" " movq %%rbx, 40(%0);" - " movq 24(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ - " adcx %%rax, %%r13;" " movq %%r13, 48(%0);" - " adcx %%rcx, %%r14;" " movq %%r14, 56(%0);" + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" /* Step 1: Compute all partial products */ - " movq 32(%1), %%rdx;" /* f[0] */ - " mulxq 40(%1), %%r8, %%r14;" " xor %%r15, %%r15;" /* f[1]*f[0] */ - " mulxq 48(%1), %%r9, %%r10;" " adcx %%r14, %%r9;" /* f[2]*f[0] */ - " mulxq 56(%1), %%rax, %%rcx;" " adcx %%rax, %%r10;" /* f[3]*f[0] */ - " movq 56(%1), %%rdx;" /* f[3] */ - " mulxq 40(%1), %%r11, %%rbx;" " adcx %%rcx, %%r11;" /* f[1]*f[3] */ - " mulxq 48(%1), %%rax, %%r13;" " adcx %%rax, %%rbx;" /* f[2]*f[3] */ - " movq 40(%1), %%rdx;" " adcx %%r15, %%r13;" /* f1 */ - " mulxq 48(%1), %%rax, %%rcx;" " mov $0, %%r14;" /* f[2]*f[1] */ + " movq 32(%0), %%rdx;" /* f[0] */ + " mulxq 40(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" /* f[1]*f[0] */ + " mulxq 48(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" /* f[2]*f[0] */ + " mulxq 56(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" /* f[3]*f[0] */ + " movq 56(%0), %%rdx;" /* f[3] */ + " mulxq 40(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" /* f[1]*f[3] */ + " mulxq 48(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" /* f[2]*f[3] */ + " movq 40(%0), %%rdx;" + " adcx %%r15, %%r13;" /* f1 */ + " mulxq 48(%0), %%rax, %%rcx;" + " mov $0, %%r14;" /* f[2]*f[1] */ /* Step 2: Compute two parallel carry chains */ - " xor %%r15, %%r15;" + " xor %%r15d, %%r15d;" " adox %%rax, %%r10;" " adcx %%r8, %%r8;" " adox %%rcx, %%r11;" @@ -661,37 +861,48 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) " adcx %%r14, %%r14;" /* Step 3: Compute intermediate squares */ - " movq 32(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ - " movq %%rax, 64(%0);" - " add %%rcx, %%r8;" " movq %%r8, 72(%0);" - " movq 40(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ - " adcx %%rax, %%r9;" " movq %%r9, 80(%0);" - " adcx %%rcx, %%r10;" " movq %%r10, 88(%0);" - " movq 48(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ - " adcx %%rax, %%r11;" " movq %%r11, 96(%0);" - " adcx %%rcx, %%rbx;" " movq %%rbx, 104(%0);" - " movq 56(%1), %%rdx;" " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ - " adcx %%rax, %%r13;" " movq %%r13, 112(%0);" - " adcx %%rcx, %%r14;" " movq %%r14, 120(%0);" + " movq 32(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[0]^2 */ + " movq %%rax, 64(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 72(%1);" + " movq 40(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[1]^2 */ + " adcx %%rax, %%r9;" + " movq %%r9, 80(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 88(%1);" + " movq 48(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[2]^2 */ + " adcx %%rax, %%r11;" + " movq %%r11, 96(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 104(%1);" + " movq 56(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" /* f[3]^2 */ + " adcx %%rax, %%r13;" + " movq %%r13, 112(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 120(%1);" /* Line up pointers */ - " mov %0, %1;" - " mov %2, %0;" + " mov %1, %0;" + " mov %2, %1;" /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" - " mulxq 32(%1), %%r8, %%r13;" - " xor %%rcx, %%rcx;" - " adoxq 0(%1), %%r8;" - " mulxq 40(%1), %%r9, %%rbx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" " adcx %%r13, %%r9;" - " adoxq 8(%1), %%r9;" - " mulxq 48(%1), %%r10, %%r13;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" " adcx %%rbx, %%r10;" - " adoxq 16(%1), %%r10;" - " mulxq 56(%1), %%r11, %%rax;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" " adcx %%r13, %%r11;" - " adoxq 24(%1), %%r11;" + " adoxq 24(%0), %%r11;" " adcx %%rcx, %%rax;" " adox %%rcx, %%rax;" " imul %%rdx, %%rax;" @@ -699,32 +910,32 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %%rcx, %%r9;" - " movq %%r9, 8(%0);" + " movq %%r9, 8(%1);" " adcx %%rcx, %%r10;" - " movq %%r10, 16(%0);" + " movq %%r10, 16(%1);" " adcx %%rcx, %%r11;" - " movq %%r11, 24(%0);" + " movq %%r11, 24(%1);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" - " movq %%r8, 0(%0);" + " movq %%r8, 0(%1);" /* Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo */ " mov $38, %%rdx;" - " mulxq 96(%1), %%r8, %%r13;" - " xor %%rcx, %%rcx;" - " adoxq 64(%1), %%r8;" - " mulxq 104(%1), %%r9, %%rbx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" " adcx %%r13, %%r9;" - " adoxq 72(%1), %%r9;" - " mulxq 112(%1), %%r10, %%r13;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" " adcx %%rbx, %%r10;" - " adoxq 80(%1), %%r10;" - " mulxq 120(%1), %%r11, %%rax;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" " adcx %%r13, %%r11;" - " adoxq 88(%1), %%r11;" + " adoxq 88(%0), %%r11;" " adcx %%rcx, %%rax;" " adox %%rcx, %%rax;" " imul %%rdx, %%rax;" @@ -732,21 +943,21 @@ static inline void fsqr2(u64 *out, const u64 *f, u64 *tmp) /* Step 2: Fold the carry back into dst */ " add %%rax, %%r8;" " adcx %%rcx, %%r9;" - " movq %%r9, 40(%0);" + " movq %%r9, 40(%1);" " adcx %%rcx, %%r10;" - " movq %%r10, 48(%0);" + " movq %%r10, 48(%1);" " adcx %%rcx, %%r11;" - " movq %%r11, 56(%0);" + " movq %%r11, 56(%1);" /* Step 3: Fold the carry bit back in; guaranteed not to carry at this point */ " mov $0, %%rax;" " cmovc %%rdx, %%rax;" " add %%rax, %%r8;" - " movq %%r8, 32(%0);" - : "+&r,&r" (tmp), "+&r,&r" (f) - : "r,m" (out) - : "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%rbx", "%r13", "%r14", "%r15", "memory", "cc" - ); + " movq %%r8, 32(%1);" + : "+&r,&r"(f), "+&r,&r"(tmp) + : "r,m"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", + "%r13", "%r14", "%r15", "memory", "cc"); } static void point_add_and_double(u64 *q, u64 *p01_tmp1, u64 *tmp2) diff --git a/net/wireguard/device.c b/net/wireguard/device.c index ece4ad2db8..062490f1b8 100644 --- a/net/wireguard/device.c +++ b/net/wireguard/device.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -160,7 +161,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) goto err_peer; } - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; __skb_queue_head_init(&packets); if (!skb_is_gso(skb)) { diff --git a/net/wireguard/queueing.c b/net/wireguard/queueing.c index 1de413b19e..8084e7408c 100644 --- a/net/wireguard/queueing.c +++ b/net/wireguard/queueing.c @@ -4,6 +4,7 @@ */ #include "queueing.h" +#include struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) @@ -42,7 +43,7 @@ void wg_packet_queue_free(struct crypt_queue *queue, bool purge) { free_percpu(queue->worker); WARN_ON(!purge && !__ptr_ring_empty(&queue->ring)); - ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL); + ptr_ring_cleanup(&queue->ring, purge ? __skb_array_destroy_skb : NULL); } #define NEXT(skb) ((skb)->prev) diff --git a/net/wireguard/socket.c b/net/wireguard/socket.c index bd887f33a3..9e0af9320c 100644 --- a/net/wireguard/socket.c +++ b/net/wireguard/socket.c @@ -160,6 +160,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb, rcu_read_unlock_bh(); return ret; #else + kfree_skb(skb); return -EAFNOSUPPORT; #endif } @@ -241,7 +242,7 @@ int wg_socket_endpoint_from_skb(struct endpoint *endpoint, endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr; endpoint->src4.s_addr = ip_hdr(skb)->daddr; endpoint->src_if4 = skb->skb_iif; - } else if (skb->protocol == htons(ETH_P_IPV6)) { + } else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { endpoint->addr6.sin6_family = AF_INET6; endpoint->addr6.sin6_port = udp_hdr(skb)->source; endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr; @@ -284,7 +285,7 @@ void wg_socket_set_peer_endpoint(struct wg_peer *peer, peer->endpoint.addr4 = endpoint->addr4; peer->endpoint.src4 = endpoint->src4; peer->endpoint.src_if4 = endpoint->src_if4; - } else if (endpoint->addr.sa_family == AF_INET6) { + } else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) { peer->endpoint.addr6 = endpoint->addr6; peer->endpoint.src6 = endpoint->src6; } else { diff --git a/net/wireguard/tests/netns.sh b/net/wireguard/tests/netns.sh index eff6030ae1..f21d59924a 100755 --- a/net/wireguard/tests/netns.sh +++ b/net/wireguard/tests/netns.sh @@ -22,10 +22,12 @@ # interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further # details on how this is accomplished. set -e +shopt -s extglob exec 3>&1 export LANG=C export WG_HIDE_KEYS=never +NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]} netns0="wg-test-$$-0" netns1="wg-test-$$-1" netns2="wg-test-$$-2" @@ -147,17 +149,15 @@ tests() { [[ $(< /proc/version) =~ ^Linux\ version\ 5\.4[.\ ] ]] || return 0 # TCP over IPv4, in parallel - for max in 4 5 50; do - local pids=( ) - for ((i=0; i < max; ++i)) do - n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & - pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) - done - for ((i=0; i < max; ++i)) do - n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & - done - wait "${pids[@]}" + local pids=( ) i + for ((i=0; i < NPROC; ++i)) do + n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & + pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) done + for ((i=0; i < NPROC; ++i)) do + n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & + done + wait "${pids[@]}" } [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" @@ -284,7 +284,19 @@ read _ _ tx_bytes_before < <(n0 wg show wg1 transfer) ! n0 ping -W 1 -c 10 -f 192.168.241.2 || false sleep 1 read _ _ tx_bytes_after < <(n0 wg show wg1 transfer) -(( tx_bytes_after - tx_bytes_before < 70000 )) +if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then + errstart=$'\x1b[37m\x1b[41m\x1b[1m' + errend=$'\x1b[0m' + echo "${errstart} ${errend}" + echo "${errstart} E R R O R ${errend}" + echo "${errstart} ${errend}" + echo "${errstart} This architecture does not do the right thing ${errend}" + echo "${errstart} with cross-namespace routing loops. This test ${errend}" + echo "${errstart} has thus technically failed but, as this issue ${errend}" + echo "${errstart} is as yet unsolved, these tests will continue ${errend}" + echo "${errstart} onward. :( ${errend}" + echo "${errstart} ${errend}" +fi ip0 link del wg1 ip1 link del wg0 diff --git a/net/wireguard/tests/qemu/Makefile b/net/wireguard/tests/qemu/Makefile index 2a92e0b0cb..2846b92923 100644 --- a/net/wireguard/tests/qemu/Makefile +++ b/net/wireguard/tests/qemu/Makefile @@ -86,8 +86,10 @@ CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) export CROSS_COMPILE=$(CBUILD)- STRIP := $(CBUILD)-strip endif +QEMU_VPORT_RESULT := ifeq ($(ARCH),aarch64) QEMU_ARCH := aarch64 +QEMU_VPORT_RESULT := virtio-serial-device KERNEL_ARCH := arm64 KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm64/boot/Image ifeq ($(HOST_ARCH),$(ARCH)) @@ -98,6 +100,7 @@ CFLAGS += -march=armv8-a -mtune=cortex-a53 endif else ifeq ($(ARCH),aarch64_be) QEMU_ARCH := aarch64 +QEMU_VPORT_RESULT := virtio-serial-device KERNEL_ARCH := arm64 KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm64/boot/Image ifeq ($(HOST_ARCH),$(ARCH)) @@ -108,6 +111,7 @@ CFLAGS += -march=armv8-a -mtune=cortex-a53 endif else ifeq ($(ARCH),arm) QEMU_ARCH := arm +QEMU_VPORT_RESULT := virtio-serial-device KERNEL_ARCH := arm KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm/boot/zImage ifeq ($(HOST_ARCH),$(ARCH)) @@ -118,6 +122,7 @@ CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux endif else ifeq ($(ARCH),armeb) QEMU_ARCH := arm +QEMU_VPORT_RESULT := virtio-serial-device KERNEL_ARCH := arm KERNEL_BZIMAGE := $(KERNEL_PATH)/arch/arm/boot/zImage ifeq ($(HOST_ARCH),$(ARCH)) @@ -217,7 +222,7 @@ KERNEL_ARCH := m68k KERNEL_BZIMAGE := $(KERNEL_PATH)/vmlinux KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE) else QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) endif @@ -230,6 +235,7 @@ MUSL_CC := $(BUILD_PATH)/musl-gcc export CC := $(MUSL_CC) USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed +comma := , build: $(KERNEL_BZIMAGE) qemu: $(KERNEL_BZIMAGE) rm -f $(BUILD_PATH)/result @@ -240,7 +246,8 @@ qemu: $(KERNEL_BZIMAGE) $(QEMU_MACHINE) \ -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_PATH)/.config && echo 1G || echo 256M) \ -serial stdio \ - -serial file:$(BUILD_PATH)/result \ + -chardev file,path=$(BUILD_PATH)/result,id=result \ + $(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \ -no-reboot \ -monitor none \ -kernel $< @@ -277,12 +284,6 @@ $(KERNEL_PATH)/.installed: $(KERNEL_TAR) printf 'ifdef CONFIG_X86_64\nLDFLAGS += $$(call ld-option, -z max-page-size=0x200000)\nendif\n' >> $(KERNEL_PATH)/arch/x86/Makefile sed -i 's/^Elf_Addr per_cpu_load_addr;$$/static \0/' $(KERNEL_PATH)/arch/x86/tools/relocs.c || true if grep -sqr UTS_UBUNTU_RELEASE_ABI $(KERNEL_PATH)/debian/rules.d; then echo 'KBUILD_CFLAGS += -DUTS_UBUNTU_RELEASE_ABI=0' >> $(KERNEL_PATH)/Makefile; fi - if grep -sq 'RHEL_MAJOR = 8' $(KERNEL_PATH)/Makefile.rhelver; then \ - sed -i '/#include /' $(KERNEL_PATH)/arch/x86/kernel/{apic/apic.c,irqinit.c,kvm.c,mpparse.c} && \ - sed -i '/#include /' $(KERNEL_PATH)/arch/x86/kernel/setup.c && \ - sed -i '/irq_hv_callback_count/d' $(KERNEL_PATH)/arch/x86/kernel/kvm.c && \ - sed -i '/do_vmm_communication/d' $(KERNEL_PATH)/arch/x86/entry/entry_64.S; \ - fi sed -i "/^if INET\$$/a source \"net/wireguard/Kconfig\"" $(KERNEL_PATH)/net/Kconfig sed -i "/^obj-\$$(CONFIG_NETFILTER).*+=/a obj-\$$(CONFIG_WIREGUARD) += wireguard/" $(KERNEL_PATH)/net/Makefile ln -sfT $(shell readlink -f ../..) $(KERNEL_PATH)/net/wireguard diff --git a/net/wireguard/tests/qemu/arch/aarch64.config b/net/wireguard/tests/qemu/arch/aarch64.config index 3d063bb247..09016880ce 100644 --- a/net/wireguard/tests/qemu/arch/aarch64.config +++ b/net/wireguard/tests/qemu/arch/aarch64.config @@ -1,5 +1,8 @@ CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/arch/aarch64_be.config b/net/wireguard/tests/qemu/arch/aarch64_be.config index dbdc7e406a..19ff66e4c6 100644 --- a/net/wireguard/tests/qemu/arch/aarch64_be.config +++ b/net/wireguard/tests/qemu/arch/aarch64_be.config @@ -1,6 +1,9 @@ CONFIG_CPU_BIG_ENDIAN=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/arch/arm.config b/net/wireguard/tests/qemu/arch/arm.config index 148f499054..fc7959bef9 100644 --- a/net/wireguard/tests/qemu/arch/arm.config +++ b/net/wireguard/tests/qemu/arch/arm.config @@ -4,6 +4,9 @@ CONFIG_ARCH_VIRT=y CONFIG_THUMB2_KERNEL=n CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/armeb.config b/net/wireguard/tests/qemu/arch/armeb.config index bd76b07d00..f3066be81c 100644 --- a/net/wireguard/tests/qemu/arch/armeb.config +++ b/net/wireguard/tests/qemu/arch/armeb.config @@ -4,7 +4,10 @@ CONFIG_ARCH_VIRT=y CONFIG_THUMB2_KERNEL=n CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1" +CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_CPU_BIG_ENDIAN=y CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/i686.config b/net/wireguard/tests/qemu/arch/i686.config index a85025d720..6d90892a85 100644 --- a/net/wireguard/tests/qemu/arch/i686.config +++ b/net/wireguard/tests/qemu/arch/i686.config @@ -1,5 +1,6 @@ +CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/m68k.config b/net/wireguard/tests/qemu/arch/m68k.config index 62a15bdb87..82c925e49b 100644 --- a/net/wireguard/tests/qemu/arch/m68k.config +++ b/net/wireguard/tests/qemu/arch/m68k.config @@ -5,5 +5,5 @@ CONFIG_MAC=y CONFIG_SERIAL_PMACZILOG=y CONFIG_SERIAL_PMACZILOG_TTYS=y CONFIG_SERIAL_PMACZILOG_CONSOLE=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/mips.config b/net/wireguard/tests/qemu/arch/mips.config index df71d6b955..d7ec63c17b 100644 --- a/net/wireguard/tests/qemu/arch/mips.config +++ b/net/wireguard/tests/qemu/arch/mips.config @@ -7,5 +7,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/mips64.config b/net/wireguard/tests/qemu/arch/mips64.config index 90c783f725..0994947e33 100644 --- a/net/wireguard/tests/qemu/arch/mips64.config +++ b/net/wireguard/tests/qemu/arch/mips64.config @@ -10,5 +10,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/arch/mips64el.config b/net/wireguard/tests/qemu/arch/mips64el.config index 435b0b43e0..591184342f 100644 --- a/net/wireguard/tests/qemu/arch/mips64el.config +++ b/net/wireguard/tests/qemu/arch/mips64el.config @@ -11,5 +11,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/arch/mipsel.config b/net/wireguard/tests/qemu/arch/mipsel.config index 62bb50c4a8..18a4982937 100644 --- a/net/wireguard/tests/qemu/arch/mipsel.config +++ b/net/wireguard/tests/qemu/arch/mipsel.config @@ -8,5 +8,5 @@ CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/powerpc.config b/net/wireguard/tests/qemu/arch/powerpc.config index 57957093b7..5e04882e8e 100644 --- a/net/wireguard/tests/qemu/arch/powerpc.config +++ b/net/wireguard/tests/qemu/arch/powerpc.config @@ -6,5 +6,5 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_MATH_EMULATION=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/net/wireguard/tests/qemu/arch/powerpc64le.config b/net/wireguard/tests/qemu/arch/powerpc64le.config index f52f1e2bc7..8148b9d122 100644 --- a/net/wireguard/tests/qemu/arch/powerpc64le.config +++ b/net/wireguard/tests/qemu/arch/powerpc64le.config @@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y CONFIG_HVC_CONSOLE=y CONFIG_CPU_LITTLE_ENDIAN=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=hvc0 wg.success=hvc1" +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_FRAME_WARN=1280 CONFIG_THREAD_SHIFT=14 diff --git a/net/wireguard/tests/qemu/arch/x86_64.config b/net/wireguard/tests/qemu/arch/x86_64.config index 00a1ef4869..efa00693e0 100644 --- a/net/wireguard/tests/qemu/arch/x86_64.config +++ b/net/wireguard/tests/qemu/arch/x86_64.config @@ -1,5 +1,6 @@ +CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1" +CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1280 diff --git a/net/wireguard/tests/qemu/init.c b/net/wireguard/tests/qemu/init.c index 3e2a237e6c..6be8064cf9 100644 --- a/net/wireguard/tests/qemu/init.c +++ b/net/wireguard/tests/qemu/init.c @@ -58,27 +58,17 @@ static void print_banner(void) static void seed_rng(void) { - int fd; - struct { - int entropy_count; - int buffer_size; - unsigned char buffer[256]; - } entropy = { - .entropy_count = sizeof(entropy.buffer) * 8, - .buffer_size = sizeof(entropy.buffer), - .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!" - }; + int bits = 4096, fd; - if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9))) - panic("mknod(/dev/urandom)"); - fd = open("/dev/urandom", O_WRONLY); + pretty_message("[+] Fake seeding RNG..."); + fd = open("/dev/random", O_WRONLY); if (fd < 0) - panic("open(urandom)"); + panic("open(random)"); for (;;) { - if (getrandom(entropy.buffer, sizeof(entropy.buffer), GRND_NONBLOCK) != -1 || errno != EAGAIN) + if (!getrandom(NULL, 0, GRND_NONBLOCK) || errno == ENOSYS) break; - if (ioctl(fd, RNDADDENTROPY, &entropy) < 0) - panic("ioctl(urandom)"); + if (ioctl(fd, RNDADDTOENTCNT, &bits) < 0) + panic("ioctl(RNDADDTOENTCNT)"); } close(fd); } @@ -126,12 +116,6 @@ static void enable_logging(void) panic("write(exception-trace)"); close(fd); } - fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY); - if (fd >= 0) { - if (write(fd, "1\n", 2) != 2) - panic("write(panic_on_warn)"); - close(fd); - } } static void kmod_selftests(void) @@ -274,10 +258,10 @@ static void check_leaks(void) int main(int argc, char *argv[]) { - seed_rng(); ensure_console(); print_banner(); mount_filesystems(); + seed_rng(); kmod_selftests(); enable_logging(); clear_leaks();