diff options
author | Sam James <sam@gentoo.org> | 2024-03-04 02:37:32 +0000 |
---|---|---|
committer | Sam James <sam@gentoo.org> | 2024-03-04 02:37:32 +0000 |
commit | 361b44c3bbbb24c615c26b5ba36cc84a80968f33 (patch) | |
tree | 68591fbd377b086f783584583ef2b5ab9fd68690 /dev-libs/libgcrypt | |
parent | app-text/mdbook: Stabilize 0.4.37 amd64, #926115 (diff) | |
download | gentoo-361b44c3bbbb24c615c26b5ba36cc84a80968f33.tar.gz gentoo-361b44c3bbbb24c615c26b5ba36cc84a80968f33.tar.bz2 gentoo-361b44c3bbbb24c615c26b5ba36cc84a80968f33.zip |
dev-libs/libgcrypt: backport asm fixes
* Backport fixes for x86 with less/no optimisation
* Backport HPPA asm fix
Closes: https://bugs.gentoo.org/915060
Closes: https://bugs.gentoo.org/925284
Signed-off-by: Sam James <sam@gentoo.org>
Diffstat (limited to 'dev-libs/libgcrypt')
-rw-r--r-- | dev-libs/libgcrypt/files/libgcrypt-1.10.3-hppa.patch | 110 | ||||
-rw-r--r-- | dev-libs/libgcrypt/files/libgcrypt-1.10.3-x86-refactor.patch | 428 | ||||
-rw-r--r-- | dev-libs/libgcrypt/files/libgcrypt-1.10.3-x86.patch | 94 | ||||
-rw-r--r-- | dev-libs/libgcrypt/libgcrypt-1.10.3-r2.ebuild | 178 |
4 files changed, 810 insertions, 0 deletions
diff --git a/dev-libs/libgcrypt/files/libgcrypt-1.10.3-hppa.patch b/dev-libs/libgcrypt/files/libgcrypt-1.10.3-hppa.patch new file mode 100644 index 000000000000..daa1bba9f439 --- /dev/null +++ b/dev-libs/libgcrypt/files/libgcrypt-1.10.3-hppa.patch @@ -0,0 +1,110 @@ +https://bugs.gentoo.org/925284 +https://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=commit;h=75e9bcccb69a9dea67d90840bd295bbd1749cea3 + +From 75e9bcccb69a9dea67d90840bd295bbd1749cea3 Mon Sep 17 00:00:00 2001 +From: NIIBE Yutaka <gniibe@fsij.org> +Date: Mon, 4 Mar 2024 09:00:59 +0900 +Subject: [PATCH] mpi: Fix ECC computation on hppa. + +* mpi/ec-inline.h [__hppa] (ADD4_LIMB32, SUB4_LIMB32): New. +* mpi/longlong.h [__hppa] (add_ssaaaa, sub_ddmmss): Add __CLOBBER_CC. + +-- + +Cherry-pick master commit of: + b757f4130af987bdfc769b754b6e9e27882c349c + +GnuPG-bug-id: 7022 +Signed-off-by: NIIBE Yutaka <gniibe@fsij.org> +--- + mpi/ec-inline.h | 40 ++++++++++++++++++++++++++++++++++++++++ + mpi/longlong.h | 12 ++++++------ + 2 files changed, 46 insertions(+), 6 deletions(-) + +diff --git a/mpi/ec-inline.h b/mpi/ec-inline.h +index 0ffdf8eb..c24d5352 100644 +--- a/mpi/ec-inline.h ++++ b/mpi/ec-inline.h +@@ -921,6 +921,46 @@ LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo) + + #endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */ + ++#if defined (__hppa) && __GNUC__ >= 4 ++#define ADD4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ ++ __asm__ ("add %7,%11,%3\n\t" \ ++ "addc %6,%10,%2\n\t" \ ++ "addc %5,%9,%1\n\t" \ ++ "addc %4,%8,%0" \ ++ : "=r" (A3), \ ++ "=&r" (A2), \ ++ "=&r" (A1), \ ++ "=&r" (A0) \ ++ : "rM" ((mpi_limb_t)(B3)), \ ++ "rM" ((mpi_limb_t)(B2)), \ ++ "rM" ((mpi_limb_t)(B1)), \ ++ "rM" ((mpi_limb_t)(B0)), \ ++ "rM" ((mpi_limb_t)(C3)), \ ++ "rM" ((mpi_limb_t)(C2)), \ ++ "rM" ((mpi_limb_t)(C1)), \ ++ "rM" ((mpi_limb_t)(C0)) \ ++ : "cc") ++ ++#define SUB4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ ++ __asm__ ("sub %7,%11,%3\n\t" \ ++ "subb %6,%10,%2\n\t" \ ++ "subb %5,%9,%1\n\t" \ ++ "subb %4,%8,%0\n\t" \ ++ : "=r" (A3), \ ++ "=&r" (A2), \ ++ "=&r" (A1), \ ++ "=&r" (A0) \ ++ : "rM" ((mpi_limb_t)(B3)), \ ++ "rM" ((mpi_limb_t)(B2)), \ ++ "rM" ((mpi_limb_t)(B1)), \ ++ "rM" ((mpi_limb_t)(B0)), \ ++ "rM" ((mpi_limb_t)(C3)), \ ++ "rM" ((mpi_limb_t)(C2)), \ ++ "rM" ((mpi_limb_t)(C1)), \ ++ "rM" ((mpi_limb_t)(C0)) \ ++ : "cc") ++ ++#endif /* __hppa */ + + /* Common 32-bit arch addition/subtraction macros. */ + +diff --git a/mpi/longlong.h b/mpi/longlong.h +index c299534c..1ab70e7e 100644 +--- a/mpi/longlong.h ++++ b/mpi/longlong.h +@@ -394,23 +394,23 @@ extern UDItype __udiv_qrnnd (); + ***************************************/ + #if defined (__hppa) && W_TYPE_SIZE == 32 + # define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +- __asm__ (" add %4,%5,%1\n" \ +- " addc %2,%3,%0" \ ++ __asm__ ("add %4,%5,%1\n\t" \ ++ "addc %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "%rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "%rM" ((USItype)(al)), \ +- "rM" ((USItype)(bl))) ++ "rM" ((USItype)(bl)) __CLOBBER_CC) + # define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +- __asm__ (" sub %4,%5,%1\n" \ +- " subb %2,%3,%0" \ ++ __asm__ ("sub %4,%5,%1\n\t" \ ++ "subb %2,%3,%0" \ + : "=r" ((USItype)(sh)), \ + "=&r" ((USItype)(sl)) \ + : "rM" ((USItype)(ah)), \ + "rM" ((USItype)(bh)), \ + "rM" ((USItype)(al)), \ +- "rM" ((USItype)(bl))) ++ "rM" ((USItype)(bl)) __CLOBBER_CC) + # if defined (_PA_RISC1_1) + # define umul_ppmm(wh, wl, u, v) \ + do { \ +-- +2.30.2 diff --git a/dev-libs/libgcrypt/files/libgcrypt-1.10.3-x86-refactor.patch b/dev-libs/libgcrypt/files/libgcrypt-1.10.3-x86-refactor.patch new file mode 100644 index 000000000000..527150671097 --- /dev/null +++ b/dev-libs/libgcrypt/files/libgcrypt-1.10.3-x86-refactor.patch @@ -0,0 +1,428 @@ +https://bugs.gentoo.org/915060 +https://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=commit;h=07f0563d325c6589ca1560525d3b22291feec227 + +From 07f0563d325c6589ca1560525d3b22291feec227 Mon Sep 17 00:00:00 2001 +From: Jussi Kivilinna <jussi.kivilinna@iki.fi> +Date: Tue, 19 Dec 2023 20:23:47 +0200 +Subject: [PATCH] mpi/ec-inline: refactor i386 assembly to reduce register + usage + +* mpi/ec-inline.h [__i386__] (ADD2_LIMB32_CARRY_OUT) +(ADD2_LIMB32_CARRY_IN_OUT, ADD2_LIB32_CARRY_IN, SUB2_LIMB32_CARRY_OUT) +(SUB2_LIMB32_CARRY_IN_OUT, SUB2_LIB32_CARRY_IN, ADD8_LIMB32) +(ADD10_LIMB32, ADD14_LIMB32, SUB8_LIMB32, SUB10_LIMB32) +(SUB14_LIMB32): New. +[__i386__] (ADD4_LIMB32, ADD6_LIMB32, SUB4_LIMB32, SUB6_LIMB32): Rewrite +to use new *_CARRY_* macros. +[BYTES_PER_MPI_LIMB == 4] (ADD4_LIMB64): Use ADD8_LIMB32 if available. +[BYTES_PER_MPI_LIMB == 4] (ADD5_LIMB64): Use ADD10_LIMB32 if available. +[BYTES_PER_MPI_LIMB == 4] (ADD7_LIMB64): Use ADD14_LIMB32 if available. +[BYTES_PER_MPI_LIMB == 4] (SUB4_LIMB64): Use SUB8_LIMB32 if available. +[BYTES_PER_MPI_LIMB == 4] (SUB5_LIMB64): Use SUB10_LIMB32 if available. +[BYTES_PER_MPI_LIMB == 4] (SUB7_LIMB64): Use SUB14_LIMB32 if available. +-- + +Cherry pick of master commit: + 956f1ed4ec6ead59dc56f574f943f1fe25dac723 + +This commit reduces number register operands and register pressure +at i386 assembly used in `ec-nist.c` NIST-P192, P224, P256, and P384. +Performance stays relatively same, with P192 being ~2% slower +than before and P384 being ~5% faster. + +GnuPG-bug-id: T6892 +Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi> +--- + mpi/ec-inline.h | 327 +++++++++++++++++++++++++++++++++--------------- + 1 file changed, 229 insertions(+), 98 deletions(-) + +diff --git a/mpi/ec-inline.h b/mpi/ec-inline.h +index a07826e3..0ffdf8eb 100644 +--- a/mpi/ec-inline.h ++++ b/mpi/ec-inline.h +@@ -641,116 +641,192 @@ LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo) + /* i386 addition/subtraction helpers. */ + #if defined (__i386__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4 + +-#define ADD4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) \ +- __asm__ ("addl %11, %3\n" \ +- "adcl %10, %2\n" \ +- "adcl %9, %1\n" \ +- "adcl %8, %0\n" \ +- : "=r" (a3), \ +- "=&r" (a2), \ ++#define ADD2_LIMB32_CARRY_OUT(carry, a1, a0, b1, b0, c1, c0) \ ++ __asm__ ("addl %7, %2\n" \ ++ "adcl %6, %1\n" \ ++ "sbbl %0, %0\n" \ ++ : "=r" (carry), \ + "=&r" (a1), \ + "=&r" (a0) \ +- : "0" ((mpi_limb_t)(b3)), \ +- "1" ((mpi_limb_t)(b2)), \ +- "2" ((mpi_limb_t)(b1)), \ +- "3" ((mpi_limb_t)(b0)), \ +- "g" ((mpi_limb_t)(c3)), \ +- "g" ((mpi_limb_t)(c2)), \ +- "g" ((mpi_limb_t)(c1)), \ +- "g" ((mpi_limb_t)(c0)) \ ++ : "0" ((mpi_limb_t)(0)), \ ++ "1" ((mpi_limb_t)(b1)), \ ++ "2" ((mpi_limb_t)(b0)), \ ++ "re" ((mpi_limb_t)(c1)), \ ++ "re" ((mpi_limb_t)(c0)) \ + : "cc") + ++#define ADD2_LIMB32_CARRY_IN_OUT(a1, a0, b1, b0, c1, c0, carry) \ ++ __asm__ ("addl $1, %0\n" \ ++ "adcl %7, %2\n" \ ++ "adcl %6, %1\n" \ ++ "sbbl %0, %0\n" \ ++ : "=r" (carry), \ ++ "=&r" (a1), \ ++ "=&r" (a0) \ ++ : "0" ((mpi_limb_t)(carry)), \ ++ "1" ((mpi_limb_t)(b1)), \ ++ "2" ((mpi_limb_t)(b0)), \ ++ "re" ((mpi_limb_t)(c1)), \ ++ "re" ((mpi_limb_t)(c0)) \ ++ : "cc") ++ ++#define ADD2_LIMB32_CARRY_IN(a1, a0, b1, b0, c1, c0, carry) \ ++ __asm__ ("addl $1, %2\n" \ ++ "adcl %7, %1\n" \ ++ "adcl %6, %0\n" \ ++ : "=r" (a1), \ ++ "=&r" (a0), \ ++ "=&g" (carry) \ ++ : "0" ((mpi_limb_t)(b1)), \ ++ "1" ((mpi_limb_t)(b0)), \ ++ "2" ((mpi_limb_t)(carry)), \ ++ "re" ((mpi_limb_t)(c1)), \ ++ "re" ((mpi_limb_t)(c0)) \ ++ : "cc") ++ ++#define ADD4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) do { \ ++ mpi_limb_t __carry4_32; \ ++ ADD2_LIMB32_CARRY_OUT(__carry4_32, a1, a0, b1, b0, c1, c0); \ ++ ADD2_LIMB32_CARRY_IN(a3, a2, b3, b2, c3, c2, __carry4_32); \ ++ } while (0) ++ + #define ADD6_LIMB32(a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0, \ + c5, c4, c3, c2, c1, c0) do { \ + mpi_limb_t __carry6_32; \ +- __asm__ ("addl %10, %3\n" \ +- "adcl %9, %2\n" \ +- "adcl %8, %1\n" \ +- "sbbl %0, %0\n" \ +- : "=r" (__carry6_32), \ +- "=&r" (a2), \ +- "=&r" (a1), \ +- "=&r" (a0) \ +- : "0" ((mpi_limb_t)(0)), \ +- "1" ((mpi_limb_t)(b2)), \ +- "2" ((mpi_limb_t)(b1)), \ +- "3" ((mpi_limb_t)(b0)), \ +- "g" ((mpi_limb_t)(c2)), \ +- "g" ((mpi_limb_t)(c1)), \ +- "g" ((mpi_limb_t)(c0)) \ +- : "cc"); \ +- __asm__ ("addl $1, %3\n" \ +- "adcl %10, %2\n" \ +- "adcl %9, %1\n" \ +- "adcl %8, %0\n" \ +- : "=r" (a5), \ +- "=&r" (a4), \ +- "=&r" (a3), \ +- "=&r" (__carry6_32) \ +- : "0" ((mpi_limb_t)(b5)), \ +- "1" ((mpi_limb_t)(b4)), \ +- "2" ((mpi_limb_t)(b3)), \ +- "3" ((mpi_limb_t)(__carry6_32)), \ +- "g" ((mpi_limb_t)(c5)), \ +- "g" ((mpi_limb_t)(c4)), \ +- "g" ((mpi_limb_t)(c3)) \ +- : "cc"); \ ++ ADD2_LIMB32_CARRY_OUT(__carry6_32, a1, a0, b1, b0, c1, c0); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry6_32); \ ++ ADD2_LIMB32_CARRY_IN(a5, a4, b5, b4, c5, c4, __carry6_32); \ ++ } while (0) ++ ++#define ADD8_LIMB32(a7, a6, a5, a4, a3, a2, a1, a0, \ ++ b7, b6, b5, b4, b3, b2, b1, b0, \ ++ c7, c6, c5, c4, c3, c2, c1, c0) do { \ ++ mpi_limb_t __carry8_32; \ ++ ADD2_LIMB32_CARRY_OUT(__carry8_32, a1, a0, b1, b0, c1, c0); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry8_32); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry8_32); \ ++ ADD2_LIMB32_CARRY_IN(a7, a6, b7, b6, c7, c6, __carry8_32); \ + } while (0) + +-#define SUB4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) \ +- __asm__ ("subl %11, %3\n" \ +- "sbbl %10, %2\n" \ +- "sbbl %9, %1\n" \ +- "sbbl %8, %0\n" \ +- : "=r" (a3), \ +- "=&r" (a2), \ ++#define ADD10_LIMB32(a9, a8, a7, a6, a5, a4, a3, a2, a1, a0, \ ++ b9, b8, b7, b6, b5, b4, b3, b2, b1, b0, \ ++ c9, c8, c7, c6, c5, c4, c3, c2, c1, c0) do { \ ++ mpi_limb_t __carry10_32; \ ++ ADD2_LIMB32_CARRY_OUT(__carry10_32, a1, a0, b1, b0, c1, c0); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry10_32); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry10_32); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a7, a6, b7, b6, c7, c6, __carry10_32); \ ++ ADD2_LIMB32_CARRY_IN(a9, a8, b9, b8, c9, c8, __carry10_32); \ ++ } while (0) ++ ++#define ADD14_LIMB32(a13, a12, a11, a10, a9, a8, a7, \ ++ a6, a5, a4, a3, a2, a1, a0, \ ++ b13, b12, b11, b10, b9, b8, b7, \ ++ b6, b5, b4, b3, b2, b1, b0, \ ++ c13, c12, c11, c10, c9, c8, c7, \ ++ c6, c5, c4, c3, c2, c1, c0) do { \ ++ mpi_limb_t __carry14_32; \ ++ ADD2_LIMB32_CARRY_OUT(__carry14_32, a1, a0, b1, b0, c1, c0); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry14_32); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry14_32); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a7, a6, b7, b6, c7, c6, __carry14_32); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a9, a8, b9, b8, c9, c8, __carry14_32); \ ++ ADD2_LIMB32_CARRY_IN_OUT(a11, a10, b11, b10, c11, c10, __carry14_32); \ ++ ADD2_LIMB32_CARRY_IN(a13, a12, b13, b12, c13, c12, __carry14_32); \ ++ } while (0) ++ ++#define SUB2_LIMB32_CARRY_OUT(carry, a1, a0, b1, b0, c1, c0) \ ++ __asm__ ("subl %7, %2\n" \ ++ "sbbl %6, %1\n" \ ++ "sbbl %0, %0\n" \ ++ : "=r" (carry), \ + "=&r" (a1), \ + "=&r" (a0) \ +- : "0" ((mpi_limb_t)(b3)), \ +- "1" ((mpi_limb_t)(b2)), \ +- "2" ((mpi_limb_t)(b1)), \ +- "3" ((mpi_limb_t)(b0)), \ +- "g" ((mpi_limb_t)(c3)), \ +- "g" ((mpi_limb_t)(c2)), \ +- "g" ((mpi_limb_t)(c1)), \ +- "g" ((mpi_limb_t)(c0)) \ ++ : "0" ((mpi_limb_t)(0)), \ ++ "1" ((mpi_limb_t)(b1)), \ ++ "2" ((mpi_limb_t)(b0)), \ ++ "re" ((mpi_limb_t)(c1)), \ ++ "re" ((mpi_limb_t)(c0)) \ ++ : "cc") ++ ++#define SUB2_LIMB32_CARRY_IN_OUT(a1, a0, b1, b0, c1, c0, carry) \ ++ __asm__ ("addl $1, %0\n" \ ++ "sbbl %7, %2\n" \ ++ "sbbl %6, %1\n" \ ++ "sbbl %0, %0\n" \ ++ : "=r" (carry), \ ++ "=&r" (a1), \ ++ "=&r" (a0) \ ++ : "0" ((mpi_limb_t)(carry)), \ ++ "1" ((mpi_limb_t)(b1)), \ ++ "2" ((mpi_limb_t)(b0)), \ ++ "re" ((mpi_limb_t)(c1)), \ ++ "re" ((mpi_limb_t)(c0)) \ ++ : "cc") ++ ++#define SUB2_LIMB32_CARRY_IN(a1, a0, b1, b0, c1, c0, carry) \ ++ __asm__ ("addl $1, %2\n" \ ++ "sbbl %7, %1\n" \ ++ "sbbl %6, %0\n" \ ++ : "=r" (a1), \ ++ "=&r" (a0), \ ++ "=&g" (carry) \ ++ : "0" ((mpi_limb_t)(b1)), \ ++ "1" ((mpi_limb_t)(b0)), \ ++ "2" ((mpi_limb_t)(carry)), \ ++ "re" ((mpi_limb_t)(c1)), \ ++ "re" ((mpi_limb_t)(c0)) \ + : "cc") + ++#define SUB4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) do { \ ++ mpi_limb_t __carry4_32; \ ++ SUB2_LIMB32_CARRY_OUT(__carry4_32, a1, a0, b1, b0, c1, c0); \ ++ SUB2_LIMB32_CARRY_IN(a3, a2, b3, b2, c3, c2, __carry4_32); \ ++ } while (0) ++ + #define SUB6_LIMB32(a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0, \ + c5, c4, c3, c2, c1, c0) do { \ +- mpi_limb_t __borrow6_32; \ +- __asm__ ("subl %10, %3\n" \ +- "sbbl %9, %2\n" \ +- "sbbl %8, %1\n" \ +- "sbbl %0, %0\n" \ +- : "=r" (__borrow6_32), \ +- "=&r" (a2), \ +- "=&r" (a1), \ +- "=&r" (a0) \ +- : "0" ((mpi_limb_t)(0)), \ +- "1" ((mpi_limb_t)(b2)), \ +- "2" ((mpi_limb_t)(b1)), \ +- "3" ((mpi_limb_t)(b0)), \ +- "g" ((mpi_limb_t)(c2)), \ +- "g" ((mpi_limb_t)(c1)), \ +- "g" ((mpi_limb_t)(c0)) \ +- : "cc"); \ +- __asm__ ("addl $1, %3\n" \ +- "sbbl %10, %2\n" \ +- "sbbl %9, %1\n" \ +- "sbbl %8, %0\n" \ +- : "=r" (a5), \ +- "=&r" (a4), \ +- "=&r" (a3), \ +- "=&r" (__borrow6_32) \ +- : "0" ((mpi_limb_t)(b5)), \ +- "1" ((mpi_limb_t)(b4)), \ +- "2" ((mpi_limb_t)(b3)), \ +- "3" ((mpi_limb_t)(__borrow6_32)), \ +- "g" ((mpi_limb_t)(c5)), \ +- "g" ((mpi_limb_t)(c4)), \ +- "g" ((mpi_limb_t)(c3)) \ +- : "cc"); \ ++ mpi_limb_t __carry6_32; \ ++ SUB2_LIMB32_CARRY_OUT(__carry6_32, a1, a0, b1, b0, c1, c0); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry6_32); \ ++ SUB2_LIMB32_CARRY_IN(a5, a4, b5, b4, c5, c4, __carry6_32); \ ++ } while (0) ++ ++#define SUB8_LIMB32(a7, a6, a5, a4, a3, a2, a1, a0, \ ++ b7, b6, b5, b4, b3, b2, b1, b0, \ ++ c7, c6, c5, c4, c3, c2, c1, c0) do { \ ++ mpi_limb_t __carry8_32; \ ++ SUB2_LIMB32_CARRY_OUT(__carry8_32, a1, a0, b1, b0, c1, c0); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry8_32); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry8_32); \ ++ SUB2_LIMB32_CARRY_IN(a7, a6, b7, b6, c7, c6, __carry8_32); \ ++ } while (0) ++ ++#define SUB10_LIMB32(a9, a8, a7, a6, a5, a4, a3, a2, a1, a0, \ ++ b9, b8, b7, b6, b5, b4, b3, b2, b1, b0, \ ++ c9, c8, c7, c6, c5, c4, c3, c2, c1, c0) do { \ ++ mpi_limb_t __carry10_32; \ ++ SUB2_LIMB32_CARRY_OUT(__carry10_32, a1, a0, b1, b0, c1, c0); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry10_32); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry10_32); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a7, a6, b7, b6, c7, c6, __carry10_32); \ ++ SUB2_LIMB32_CARRY_IN(a9, a8, b9, b8, c9, c8, __carry10_32); \ ++ } while (0) ++ ++#define SUB14_LIMB32(a13, a12, a11, a10, a9, a8, a7, \ ++ a6, a5, a4, a3, a2, a1, a0, \ ++ b13, b12, b11, b10, b9, b8, b7, \ ++ b6, b5, b4, b3, b2, b1, b0, \ ++ c13, c12, c11, c10, c9, c8, c7, \ ++ c6, c5, c4, c3, c2, c1, c0) do { \ ++ mpi_limb_t __carry14_32; \ ++ SUB2_LIMB32_CARRY_OUT(__carry14_32, a1, a0, b1, b0, c1, c0); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a3, a2, b3, b2, c3, c2, __carry14_32); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a5, a4, b5, b4, c5, c4, __carry14_32); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a7, a6, b7, b6, c7, c6, __carry14_32); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a9, a8, b9, b8, c9, c8, __carry14_32); \ ++ SUB2_LIMB32_CARRY_IN_OUT(a11, a10, b11, b10, c11, c10, __carry14_32); \ ++ SUB2_LIMB32_CARRY_IN(a13, a12, b13, b12, c13, c12, __carry14_32); \ + } while (0) + + #endif /* __i386__ */ +@@ -820,7 +896,6 @@ LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo) + "Ir" ((mpi_limb_t)(C0)) \ + : "cc") + +- + #define SUB6_LIMB32(A5, A4, A3, A2, A1, A0, B5, B4, B3, B2, B1, B0, \ + C5, C4, C3, C2, C1, C0) do { \ + mpi_limb_t __borrow6_32; \ +@@ -875,7 +950,13 @@ LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo) + C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) + #endif + +-#if defined(ADD6_LIMB32) ++#if defined(ADD8_LIMB32) ++/* A[0..3] = B[0..3] + C[0..3] */ ++#define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ ++ ADD8_LIMB32(A3.hi, A3.lo, A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ ++ B3.hi, B3.lo, B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ ++ C3.hi, C3.lo, C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) ++#elif defined(ADD6_LIMB32) + /* A[0..3] = B[0..3] + C[0..3] */ + #define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ + mpi_limb_t __carry4; \ +@@ -888,6 +969,28 @@ LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo) + } while (0) + #endif + ++#if defined(ADD10_LIMB32) ++/* A[0..4] = B[0..4] + C[0..4] */ ++#define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ ++ C4, C3, C2, C1, C0) \ ++ ADD10_LIMB32(A4.hi, A4.lo, A3.hi, A3.lo, A2.hi, A2.lo, A1.hi, A1.lo, \ ++ A0.hi, A0.lo, B4.hi, B4.lo, B3.hi, B3.lo, B2.hi, B2.lo, \ ++ B1.hi, B1.lo, B0.hi, B0.lo, C4.hi, C4.lo, C3.hi, C3.lo, \ ++ C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) ++#endif ++ ++#if defined(ADD14_LIMB32) ++/* A[0..6] = B[0..6] + C[0..6] */ ++#define ADD7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \ ++ C6, C5, C4, C3, C2, C1, C0) \ ++ ADD14_LIMB32(A6.hi, A6.lo, A5.hi, A5.lo, A4.hi, A4.lo, A3.hi, A3.lo, \ ++ A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, B6.hi, B6.lo, \ ++ B5.hi, B5.lo, B4.hi, B4.lo, B3.hi, B3.lo, B2.hi, B2.lo, \ ++ B1.hi, B1.lo, B0.hi, B0.lo, C6.hi, C6.lo, C5.hi, C5.lo, \ ++ C4.hi, C4.lo, C3.hi, C3.lo, C2.hi, C2.lo, C1.hi, C1.lo, \ ++ C0.hi, C0.lo) ++#endif ++ + #if defined(SUB4_LIMB32) + /* A[0..1] = B[0..1] - C[0..1] */ + #define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) \ +@@ -914,7 +1017,13 @@ LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo) + C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) + #endif + +-#if defined(SUB6_LIMB32) ++#if defined(SUB8_LIMB32) ++/* A[0..3] = B[0..3] - C[0..3] */ ++#define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \ ++ SUB8_LIMB32(A3.hi, A3.lo, A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \ ++ B3.hi, B3.lo, B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \ ++ C3.hi, C3.lo, C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) ++#elif defined(SUB6_LIMB32) + /* A[0..3] = B[0..3] - C[0..3] */ + #define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \ + mpi_limb_t __borrow4; \ +@@ -927,6 +1036,28 @@ LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo) + } while (0) + #endif + ++#if defined(SUB10_LIMB32) ++/* A[0..4] = B[0..4] - C[0..4] */ ++#define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \ ++ C4, C3, C2, C1, C0) \ ++ SUB10_LIMB32(A4.hi, A4.lo, A3.hi, A3.lo, A2.hi, A2.lo, A1.hi, A1.lo, \ ++ A0.hi, A0.lo, B4.hi, B4.lo, B3.hi, B3.lo, B2.hi, B2.lo, \ ++ B1.hi, B1.lo, B0.hi, B0.lo, C4.hi, C4.lo, C3.hi, C3.lo, \ ++ C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo) ++#endif ++ ++#if defined(SUB14_LIMB32) ++/* A[0..6] = B[0..6] - C[0..6] */ ++#define SUB7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \ ++ C6, C5, C4, C3, C2, C1, C0) \ ++ SUB14_LIMB32(A6.hi, A6.lo, A5.hi, A5.lo, A4.hi, A4.lo, A3.hi, A3.lo, \ ++ A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, B6.hi, B6.lo, \ ++ B5.hi, B5.lo, B4.hi, B4.lo, B3.hi, B3.lo, B2.hi, B2.lo, \ ++ B1.hi, B1.lo, B0.hi, B0.lo, C6.hi, C6.lo, C5.hi, C5.lo, \ ++ C4.hi, C4.lo, C3.hi, C3.lo, C2.hi, C2.lo, C1.hi, C1.lo, \ ++ C0.hi, C0.lo) ++#endif ++ + #endif /* BYTES_PER_MPI_LIMB == 4 */ + + +-- +2.30.2 diff --git a/dev-libs/libgcrypt/files/libgcrypt-1.10.3-x86.patch b/dev-libs/libgcrypt/files/libgcrypt-1.10.3-x86.patch new file mode 100644 index 000000000000..51ea0047c4e4 --- /dev/null +++ b/dev-libs/libgcrypt/files/libgcrypt-1.10.3-x86.patch @@ -0,0 +1,94 @@ +https://bugs.gentoo.org/915060 +https://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=commit;h=08b88b4012f7837736b8d29a3689ce3fff2a10c8 + +From 08b88b4012f7837736b8d29a3689ce3fff2a10c8 Mon Sep 17 00:00:00 2001 +From: Jussi Kivilinna <jussi.kivilinna@iki.fi> +Date: Sat, 16 Dec 2023 19:50:23 +0200 +Subject: [PATCH] mpi/ec-nist: fix for -Og build failure on i386 + +* mpi/ec-nist.c (_gcry_mpi_ec_nist256_mod) +(_gcry_mpi_ec_nist384_mod): Load p_mult constant with carry offset +to stack. +-- + +Cherry pick master commit of: + 90097bd2f41c217dc5c666570e5680f432cf92d3 + +Patch fixes compilation error on i386 with -Og optimization level. + +In file included from ../../mpi/ec-nist.c:34: +../../mpi/ec-nist.c: In function '_gcry_mpi_ec_nist256_mod': +../../mpi/ec-inline.h:701:3: error: 'asm' operand has impossible constraints + 701 | __asm__ ("subl %11, %3\n" \ + | ^~~~~~~ +../../mpi/ec-inline.h:894:9: note: in expansion of macro 'SUB4_LIMB32' + 894 | SUB4_LIMB32(A1.hi, A1.lo, A0.hi, A0.lo, \ + | ^~~~~~~~~~~ +../../mpi/ec-inline.h:1009:5: note: in expansion of macro 'SUB2_LIMB64' + 1009 | SUB2_LIMB64(A4, A3, B4, B3, C4, C3); \ + | ^~~~~~~~~~~ +../../mpi/ec-nist.c:474:3: note: in expansion of macro 'SUB5_LIMB64' + 474 | SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0], + | ^~~~~~~~~~~ + +Appears that in problematic function, too many registers end up being +allocated for addressing and there is not enough register left for +asm input/output (4 registers needed for this block). Problem can be +workaround by reducing needed addressing registers by pushing +`p_mult[carry + ...]` values to stack. On other compiler flag levels +and architectures, compiler should be able to optimize away this +extra copying and have not effect on performance. + +GnuPG-bug-id: T6892 +Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi> +--- + mpi/ec-nist.c | 23 ++++++++++++++++------- + 1 file changed, 16 insertions(+), 7 deletions(-) + +diff --git a/mpi/ec-nist.c b/mpi/ec-nist.c +index f792405c..559d02d9 100644 +--- a/mpi/ec-nist.c ++++ b/mpi/ec-nist.c +@@ -471,11 +471,15 @@ _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx) + + carry = LO32_LIMB64(s[4]); + ++ /* Load values to stack to ease register pressure on i386. */ ++ e[0] = p_mult[carry + 4][0]; ++ e[1] = p_mult[carry + 4][1]; ++ e[2] = p_mult[carry + 4][2]; ++ e[3] = p_mult[carry + 4][3]; ++ e[4] = p_mult[carry + 4][4]; + SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0], + s[4], s[3], s[2], s[1], s[0], +- p_mult[carry + 4][4], p_mult[carry + 4][3], +- p_mult[carry + 4][2], p_mult[carry + 4][1], +- p_mult[carry + 4][0]); ++ e[4], e[3], e[2], e[1], e[0]); + + /* Add 1*P */ + ADD5_LIMB64 (d[4], d[3], d[2], d[1], d[0], +@@ -749,12 +753,17 @@ _gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx) + + carry = LO32_LIMB64(s[6]); + ++ /* Load values to stack to ease register pressure on i386. */ ++ x[0] = p_mult[carry + 3][0]; ++ x[1] = p_mult[carry + 3][1]; ++ x[2] = p_mult[carry + 3][2]; ++ x[3] = p_mult[carry + 3][3]; ++ x[4] = p_mult[carry + 3][4]; ++ x[5] = p_mult[carry + 3][5]; ++ x[6] = p_mult[carry + 3][6]; + SUB7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0], + s[6], s[5], s[4], s[3], s[2], s[1], s[0], +- p_mult[carry + 3][6], p_mult[carry + 3][5], +- p_mult[carry + 3][4], p_mult[carry + 3][3], +- p_mult[carry + 3][2], p_mult[carry + 3][1], +- p_mult[carry + 3][0]); ++ x[6], x[5], x[4], x[3], x[2], x[1], x[0]); + + ADD7_LIMB64 (d[6], d[5], d[4], d[3], d[2], d[1], d[0], + s[6], s[5], s[4], s[3], s[2], s[1], s[0], +-- +2.30.2 diff --git a/dev-libs/libgcrypt/libgcrypt-1.10.3-r2.ebuild b/dev-libs/libgcrypt/libgcrypt-1.10.3-r2.ebuild new file mode 100644 index 000000000000..bbb0d0fd009b --- /dev/null +++ b/dev-libs/libgcrypt/libgcrypt-1.10.3-r2.ebuild @@ -0,0 +1,178 @@ +# Copyright 1999-2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +VERIFY_SIG_OPENPGP_KEY_PATH=/usr/share/openpgp-keys/gnupg.asc +inherit autotools flag-o-matic linux-info multilib-minimal toolchain-funcs verify-sig + +DESCRIPTION="General purpose crypto library based on the code used in GnuPG" +HOMEPAGE="https://www.gnupg.org/" +SRC_URI="mirror://gnupg/${PN}/${P}.tar.bz2" +SRC_URI+=" verify-sig? ( mirror://gnupg/${PN}/${P}.tar.bz2.sig )" + +LICENSE="LGPL-2.1+ GPL-2+ MIT" +SLOT="0/20" # subslot = soname major version +KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~loong ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86 ~amd64-linux ~x86-linux ~arm64-macos ~ppc-macos ~x64-macos ~x64-solaris" +IUSE="+asm cpu_flags_arm_neon cpu_flags_arm_aes cpu_flags_arm_sha1 cpu_flags_arm_sha2 cpu_flags_ppc_altivec cpu_flags_ppc_vsx2 cpu_flags_ppc_vsx3 cpu_flags_x86_aes cpu_flags_x86_avx cpu_flags_x86_avx2 cpu_flags_x86_padlock cpu_flags_x86_sha cpu_flags_x86_sse4_1 doc +getentropy static-libs" + +# Build system only has --disable-arm-crypto-support right now +# If changing this, update src_configure logic too. +# ARM CPUs seem to, right now, support all-or-nothing for crypto extensions, +# but this looks like it might change in future. This is just a safety check +# in case people somehow do have a CPU which only supports some. They must +# for now disable them all if that's the case. +REQUIRED_USE=" + cpu_flags_arm_aes? ( cpu_flags_arm_sha1 cpu_flags_arm_sha2 ) + cpu_flags_arm_sha1? ( cpu_flags_arm_aes cpu_flags_arm_sha2 ) + cpu_flags_arm_sha2? ( cpu_flags_arm_aes cpu_flags_arm_sha1 ) + cpu_flags_ppc_vsx3? ( cpu_flags_ppc_altivec cpu_flags_ppc_vsx2 ) + cpu_flags_ppc_vsx2? ( cpu_flags_ppc_altivec ) +" + +RDEPEND=" + >=dev-libs/libgpg-error-1.25[${MULTILIB_USEDEP}] + getentropy? ( + kernel_linux? ( + elibc_glibc? ( >=sys-libs/glibc-2.25 ) + elibc_musl? ( >=sys-libs/musl-1.1.20 ) + ) + ) +" +DEPEND="${RDEPEND}" +BDEPEND=" + doc? ( virtual/texi2dvi ) + verify-sig? ( sec-keys/openpgp-keys-gnupg ) +" + +PATCHES=( + "${FILESDIR}"/${PN}-multilib-syspath.patch + "${FILESDIR}"/${PN}-powerpc-darwin.patch + "${FILESDIR}"/${PN}-1.9.4-no-fgrep-libgcrypt-config.patch + "${FILESDIR}"/${PN}-1.10.3-x86.patch + "${FILESDIR}"/${PN}-1.10.3-x86-refactor.patch + "${FILESDIR}"/${PN}-1.10.3-hppa.patch +) + +MULTILIB_CHOST_TOOLS=( + /usr/bin/libgcrypt-config +) + +pkg_pretend() { + if [[ ${MERGE_TYPE} == buildonly ]]; then + return + fi + if use kernel_linux && use getentropy; then + unset KV_FULL + get_running_version + if [[ -n ${KV_FULL} ]] && kernel_is -lt 3 17; then + eerror "The getentropy function requires the getrandom syscall." + eerror "This was introduced in Linux 3.17." + eerror "Your system is currently running Linux ${KV_FULL}." + eerror "Disable the 'getentropy' USE flag or upgrade your kernel." + die "Kernel is too old for getentropy" + fi + fi +} + +pkg_setup() { + : +} + +src_prepare() { + default + eautoreconf +} + +multilib_src_configure() { + if [[ ${CHOST} == *86*-solaris* ]] ; then + # ASM code uses GNU ELF syntax, divide in particular, we need to + # allow this via ASFLAGS, since we don't have a flag-o-matic + # function for that, we'll have to abuse cflags for this + append-cflags -Wa,--divide + fi + + if [[ ${CHOST} == powerpc* ]] ; then + # ./configure does a lot of automagic, prevent that + # generic ppc32+ppc64 altivec + use cpu_flags_ppc_altivec || local -x gcry_cv_cc_ppc_altivec=no + use cpu_flags_ppc_altivec || local -x gcry_cv_cc_ppc_altivec_cflags=no + # power8 vector extension, aka arch 2.07 ISA, also checked below via ppc-crypto-support + use cpu_flags_ppc_vsx2 || local -x gcry_cv_gcc_inline_asm_ppc_altivec=no + # power9 vector extension, aka arch 3.00 ISA + use cpu_flags_ppc_vsx3 || local -x gcry_cv_gcc_inline_asm_ppc_arch_3_00=no + fi + + # Workaround for GCC < 11.3 bug + # https://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=commitdiff;h=0b399721ce9709ae25f9d2050360c5ab2115ae29 + # https://dev.gnupg.org/T5581 + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102124 + if use arm64 && tc-is-gcc && (($(gcc-major-version) == 11)) && + (($(gcc-minor-version) <= 2)) && (($(gcc-micro-version) == 0)) ; then + append-flags -fno-tree-loop-vectorize + fi + + append-ldflags $(test-flags-CCLD -Wl,--undefined-version) + + local myeconfargs=( + CC_FOR_BUILD="$(tc-getBUILD_CC)" + + --enable-noexecstack + $(use_enable cpu_flags_arm_neon neon-support) + # See REQUIRED_USE comment above + $(use_enable cpu_flags_arm_aes arm-crypto-support) + $(use_enable cpu_flags_ppc_vsx2 ppc-crypto-support) + $(use_enable cpu_flags_x86_aes aesni-support) + $(use_enable cpu_flags_x86_avx avx-support) + $(use_enable cpu_flags_x86_avx2 avx2-support) + $(use_enable cpu_flags_x86_padlock padlock-support) + $(use_enable cpu_flags_x86_sha shaext-support) + $(use_enable cpu_flags_x86_sse4_1 sse41-support) + # required for sys-power/suspend[crypt], bug 751568 + $(use_enable static-libs static) + + # disabled due to various applications requiring privileges + # after libgcrypt drops them (bug #468616) + --without-capabilities + + # http://trac.videolan.org/vlc/ticket/620 + $([[ ${CHOST} == *86*-darwin* ]] && echo "--disable-asm") + + $(use asm || echo "--disable-asm") + + GPG_ERROR_CONFIG="${ESYSROOT}/usr/bin/${CHOST}-gpg-error-config" + ) + + if use kernel_linux; then + # --enable-random=getentropy requires getentropy/getrandom. + # --enable-random=linux enables legacy code that tries getrandom + # and falls back to reading /dev/random. + myeconfargs+=( --enable-random=$(usex getentropy getentropy linux) ) + fi + + ECONF_SOURCE="${S}" econf "${myeconfargs[@]}" \ + $("${S}/configure" --help | grep -o -- '--without-.*-prefix') +} + +multilib_src_compile() { + default + multilib_is_native_abi && use doc && VARTEXFONTS="${T}/fonts" emake -C doc gcrypt.pdf +} + +multilib_src_test() { + # t-secmem and t-sexp need mlock which requires extra privileges; nspawn + # at least disallows that by default. + local -x GCRYPT_IN_ASAN_TEST=1 + + default +} + +multilib_src_install() { + emake DESTDIR="${D}" install + multilib_is_native_abi && use doc && dodoc doc/gcrypt.pdf +} + +multilib_src_install_all() { + default + find "${ED}" -type f -name '*.la' -delete || die +} |