1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
https://bugs.gentoo.org/915060
https://git.gnupg.org/cgi-bin/gitweb.cgi?p=libgcrypt.git;a=commit;h=08b88b4012f7837736b8d29a3689ce3fff2a10c8
From 08b88b4012f7837736b8d29a3689ce3fff2a10c8 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Date: Sat, 16 Dec 2023 19:50:23 +0200
Subject: [PATCH] mpi/ec-nist: fix for -Og build failure on i386
* mpi/ec-nist.c (_gcry_mpi_ec_nist256_mod)
(_gcry_mpi_ec_nist384_mod): Load p_mult constant with carry offset
to stack.
--
Cherry pick master commit of:
90097bd2f41c217dc5c666570e5680f432cf92d3
Patch fixes compilation error on i386 with -Og optimization level.
In file included from ../../mpi/ec-nist.c:34:
../../mpi/ec-nist.c: In function '_gcry_mpi_ec_nist256_mod':
../../mpi/ec-inline.h:701:3: error: 'asm' operand has impossible constraints
701 | __asm__ ("subl %11, %3\n" \
| ^~~~~~~
../../mpi/ec-inline.h:894:9: note: in expansion of macro 'SUB4_LIMB32'
894 | SUB4_LIMB32(A1.hi, A1.lo, A0.hi, A0.lo, \
| ^~~~~~~~~~~
../../mpi/ec-inline.h:1009:5: note: in expansion of macro 'SUB2_LIMB64'
1009 | SUB2_LIMB64(A4, A3, B4, B3, C4, C3); \
| ^~~~~~~~~~~
../../mpi/ec-nist.c:474:3: note: in expansion of macro 'SUB5_LIMB64'
474 | SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
| ^~~~~~~~~~~
Appears that in problematic function, too many registers end up being
allocated for addressing and there is not enough register left for
asm input/output (4 registers needed for this block). Problem can be
workaround by reducing needed addressing registers by pushing
`p_mult[carry + ...]` values to stack. On other compiler flag levels
and architectures, compiler should be able to optimize away this
extra copying and have not effect on performance.
GnuPG-bug-id: T6892
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
---
mpi/ec-nist.c | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/mpi/ec-nist.c b/mpi/ec-nist.c
index f792405c..559d02d9 100644
--- a/mpi/ec-nist.c
+++ b/mpi/ec-nist.c
@@ -471,11 +471,15 @@ _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx)
carry = LO32_LIMB64(s[4]);
+ /* Load values to stack to ease register pressure on i386. */
+ e[0] = p_mult[carry + 4][0];
+ e[1] = p_mult[carry + 4][1];
+ e[2] = p_mult[carry + 4][2];
+ e[3] = p_mult[carry + 4][3];
+ e[4] = p_mult[carry + 4][4];
SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
s[4], s[3], s[2], s[1], s[0],
- p_mult[carry + 4][4], p_mult[carry + 4][3],
- p_mult[carry + 4][2], p_mult[carry + 4][1],
- p_mult[carry + 4][0]);
+ e[4], e[3], e[2], e[1], e[0]);
/* Add 1*P */
ADD5_LIMB64 (d[4], d[3], d[2], d[1], d[0],
@@ -749,12 +753,17 @@ _gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx)
carry = LO32_LIMB64(s[6]);
+ /* Load values to stack to ease register pressure on i386. */
+ x[0] = p_mult[carry + 3][0];
+ x[1] = p_mult[carry + 3][1];
+ x[2] = p_mult[carry + 3][2];
+ x[3] = p_mult[carry + 3][3];
+ x[4] = p_mult[carry + 3][4];
+ x[5] = p_mult[carry + 3][5];
+ x[6] = p_mult[carry + 3][6];
SUB7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
s[6], s[5], s[4], s[3], s[2], s[1], s[0],
- p_mult[carry + 3][6], p_mult[carry + 3][5],
- p_mult[carry + 3][4], p_mult[carry + 3][3],
- p_mult[carry + 3][2], p_mult[carry + 3][1],
- p_mult[carry + 3][0]);
+ x[6], x[5], x[4], x[3], x[2], x[1], x[0]);
ADD7_LIMB64 (d[6], d[5], d[4], d[3], d[2], d[1], d[0],
s[6], s[5], s[4], s[3], s[2], s[1], s[0],
--
2.30.2
|