Add gmp-arch and udiv_qrnnd

To enable “longlong.h” removal, the udiv_qrnnd is moved to a gmp-arch.h file. It allows each architecture to implement its own arch-specific optimizations. The generic implementation now uses a static inline, which provides better type checking than the GNU extension to cast the asm constraint (and it works better with clang). Most of the architecture uses the generic implementation, which is expanded from a macro, except for alpha, x86, m68k, sh, and sparc. I kept that alpha, which uses out-of-the-line implementations and x86, where there is no easy way to use the div{q} instruction from C code. For the rest, the compiler generates good enough code. The hppa also provides arch-specific implementations, but they are not routed in “longlong.h” and thus never used. Reviewed-by: Wilco Dijkstra <Wilco.Dijkstra@arm.com>
2025-11-20 15:30:05 -03:00 · 2025-11-20 15:30:05 -03:00 · 476e962af7
parent e45174fe8c
commit 476e962af7
22 changed files with 219 additions and 402 deletions
--- a/soft-fp/soft-fp.h
+++ b/soft-fp/soft-fp.h
@ -316,22 +316,6 @@
 #include "op-8.h"
 #include "op-common.h"

-/* Sigh.  Silly things longlong.h needs.  */
-#define UWtype		_FP_W_TYPE
-#define W_TYPE_SIZE	_FP_W_TYPE_SIZE
-
-typedef int QItype __attribute__ ((mode (QI)));
-typedef int SItype __attribute__ ((mode (SI)));
-typedef int DItype __attribute__ ((mode (DI)));
-typedef unsigned int UQItype __attribute__ ((mode (QI)));
-typedef unsigned int USItype __attribute__ ((mode (SI)));
-typedef unsigned int UDItype __attribute__ ((mode (DI)));
-#if _FP_W_TYPE_SIZE == 32
-typedef unsigned int UHWtype __attribute__ ((mode (HI)));
-#elif _FP_W_TYPE_SIZE == 64
-typedef USItype UHWtype;
-#endif
-
 #ifndef CMPtype
 # define CMPtype	int
 #endif
@ -341,7 +325,10 @@ typedef USItype UHWtype;

 #ifndef umul_ppmm
 # ifdef _LIBC
+#  include <gmp.h>
+#  include <stdlib/gmp-impl.h>
 #  include <stdlib/longlong.h>
+#  include <gmp-arch.h>
 # else
 #  include "longlong.h"
 # endif
--- a/stdio-common/_itoa.c
+++ b/stdio-common/_itoa.c
@ -26,6 +26,7 @@
 #include <limits.h>
 #include <stdlib/gmp-impl.h>
 #include <stdlib/longlong.h>
+#include <gmp-arch.h>

 #include <_itoa.h>

@ -308,8 +309,8 @@ _itoa (unsigned long long int value, char *buflim, unsigned int base,
 		if (big_normalization_steps == 0)
 		  xh = 0;
 		else
-		  xh = (mp_limb_t) (value >> 64 - big_normalization_steps);
-		xl = (mp_limb_t) (value >> 32 - big_normalization_steps);
+		  xh = (mp_limb_t) (value >> (64 - big_normalization_steps));
+		xl = (mp_limb_t) (value >> (32 - big_normalization_steps));
 		udiv_qrnnd (x1hi, r, xh, xl, big_base_norm);

 		xl = ((mp_limb_t) value) << big_normalization_steps;
@ -320,7 +321,7 @@ _itoa (unsigned long long int value, char *buflim, unsigned int base,
 		  xh = x1hi;
 		else
 		  xh = ((x1hi << big_normalization_steps)
-			| (x1lo >> 32 - big_normalization_steps));
+			| (x1lo >> (32 - big_normalization_steps)));
 		xl = x1lo << big_normalization_steps;
 		udiv_qrnnd (t[0], x, xh, xl, big_base_norm);
 		t[1] = x >> big_normalization_steps;
--- a/stdio-common/_itowa.c
+++ b/stdio-common/_itowa.c
@ -21,6 +21,7 @@
 #include <limits.h>
 #include <stdlib/gmp-impl.h>
 #include <stdlib/longlong.h>
+#include <gmp-arch.h>

 #include <_itowa.h>

@ -228,8 +229,8 @@ _itowa (unsigned long long int value, wchar_t *buflim, unsigned int base,
 		if (big_normalization_steps == 0)
 		  xh = 0;
 		else
-		  xh = (mp_limb_t) (value >> 64 - big_normalization_steps);
-		xl = (mp_limb_t) (value >> 32 - big_normalization_steps);
+		  xh = (mp_limb_t) (value >> (64 - big_normalization_steps));
+		xl = (mp_limb_t) (value >> (32 - big_normalization_steps));
 		udiv_qrnnd (x1hi, r, xh, xl, big_base_norm);

 		xl = ((mp_limb_t) value) << big_normalization_steps;
@ -240,7 +241,7 @@ _itowa (unsigned long long int value, wchar_t *buflim, unsigned int base,
 		  xh = x1hi;
 		else
 		  xh = ((x1hi << big_normalization_steps)
-			| (x1lo >> 32 - big_normalization_steps));
+			| (x1lo >> (32 - big_normalization_steps)));
 		xl = x1lo << big_normalization_steps;
 		udiv_qrnnd (t[0], x, xh, xl, big_base_norm);
 		t[1] = x >> big_normalization_steps;
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@ -494,7 +494,6 @@ mpn-routines := \
  rshift \
  sub_n \
  submul_1 \
-  udiv_qrnnd \
  # mpn-routines
 mpn-headers = \
  asm-syntax.h \
--- a/stdlib/addmul_1.c
+++ b/stdlib/addmul_1.c
@ -24,6 +24,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <gmp.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>

 mp_limb_t
 mpn_addmul_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr,
--- a/stdlib/divmod_1.c
+++ b/stdlib/divmod_1.c
@ -28,6 +28,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <stdbit.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>

 #ifndef UMUL_TIME
 #define UMUL_TIME 1
--- a/stdlib/mod_1.c
+++ b/stdlib/mod_1.c
@ -25,6 +25,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <stdbit.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>

 #ifndef UMUL_TIME
 #define UMUL_TIME 1
--- a/stdlib/mul_1.c
+++ b/stdlib/mul_1.c
@ -22,6 +22,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <gmp.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>

 mp_limb_t
 mpn_mul_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr,
--- a/stdlib/strtod_l.c
+++ b/stdlib/strtod_l.c
@ -83,6 +83,7 @@ extern double ____strtod_l_internal (const char *, char **, int, locale_t);
 #include <gmp.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include "gmp-arch.h"
 #include "fpioconst.h"

 #include <assert.h>
--- a/stdlib/submul_1.c
+++ b/stdlib/submul_1.c
@ -24,6 +24,7 @@ along with the GNU MP Library; see the file COPYING.LIB.  If not, see
 #include <gmp.h>
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>

 mp_limb_t
 mpn_submul_1 (register mp_ptr res_ptr, register mp_srcptr s1_ptr,
--- a/stdlib/udiv_qrnnd.c
+++ b/stdlib/udiv_qrnnd.c
@ -1,10 +0,0 @@
-/* For some machines GNU MP needs to define an auxiliary function:
-
-   udiv_qrnnd (quotient, remainder, high_numerator, low_numerator, denominator)
-
-   Divides a two-word unsigned integer, composed by the integers
-   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
-   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
-   than DENOMINATOR for correct operation.  If, in addition, the most
-   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
-   UDIV_NEEDS_NORMALIZATION is defined to 1.  */
--- a/sysdeps/alpha/Makefile
+++ b/sysdeps/alpha/Makefile
@ -32,6 +32,10 @@ ifeq ($(subdir),string)
 sysdep_routines += stxcpy stxncpy
 endif

+ifeq ($(subdir),stdlib)
+sysdep_routines += udiv_qrnnd
+endif
+
 ifeq ($(subdir),elf)
 # The ld.so startup code cannot use literals until it self-relocates.
 CFLAGS-rtld.c = -mbuild-constants
--- a/sysdeps/alpha/gmp-arch.h
+++ b/sysdeps/alpha/gmp-arch.h
@ -0,0 +1,39 @@
+/* Multiprecision generic functions.  Alpha version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef __GMP_ALPHA_ARCH_H
+#define __GMP_ALPHA_ARCH_H
+
+extern mp_limb_t __udiv_qrnnd (mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t)
+     attribute_hidden;
+
+static __always_inline void
+udiv_qrnnd_alpha (mp_limb_t *q, mp_limb_t *r, mp_limb_t n1, mp_limb_t n0,
+		  mp_limb_t d)
+{
+  *q = __udiv_qrnnd (r, n1, n0, d);
+}
+#undef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+# undef udiv_qrnnd
+# define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  udiv_qrnnd_alpha (&__q, &__r, __n1, __n0, __d)
+
+#include <sysdeps/generic/gmp-arch.h>
+
+#endif
--- a/sysdeps/generic/gmp-arch.h
+++ b/sysdeps/generic/gmp-arch.h
@ -0,0 +1,100 @@
+/* Multiprecision generic functions.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef __GMP_ARCH_H
+#define __GMP_ARCH_H
+
+#include <stdint.h>
+#include <gmp.h>
+
+#define LL_B ((mp_limb_t) 1 << (BITS_PER_MP_LIMB / 2))
+
+static __always_inline mp_limb_t
+ll_lowpart (mp_limb_t t)
+{
+  return t & (LL_B - 1);
+}
+
+static __always_inline mp_limb_t
+ll_highpart (mp_limb_t t)
+{
+  return t >> (BITS_PER_MP_LIMB / 2);
+}
+
+/* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+   denominator) divides a UDWtype, composed by the UWtype integers
+   HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+   in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+   than DENOMINATOR for correct operation.  If, in addition, the most
+   significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+   UDIV_NEEDS_NORMALIZATION is defined to 1.  */
+#ifndef udiv_qrnnd
+static __always_inline void
+udiv_qrnnd_generic (mp_limb_t *q, mp_limb_t *r, mp_limb_t n1, mp_limb_t n0,
+		    mp_limb_t d)
+{
+  mp_limb_t d1 = ll_highpart (d),
+            d0 = ll_lowpart (d),
+            q1, q0;
+  mp_limb_t r1, r0, m;
+
+  r1 = n1 % d1;
+  q1 = n1 / d1;
+  m = q1 * d0;
+  r1 = r1 * LL_B | ll_highpart (n0);
+  if (r1 < m)
+    {
+      q1--;
+      r1 += d;
+      if (r1 >= d)
+        if (r1 < m)
+          {
+            q1--;
+            r1 += d;
+          }
+    }
+  r1 -= m;
+
+  r0 = r1 % d1;
+  q0 = r1 / d1;
+  m = q0 * d0;
+  r0 = r0 * LL_B | ll_lowpart (n0);
+  if (r0 < m)
+    {
+      q0--;
+      r0 += d;
+      if (r0 >= d)
+        if (r0 < m)
+          {
+            q0--;
+            r0 += d;
+          }
+    }
+  r0 -= m;
+
+  *q = q1 * LL_B | q0;
+  *r = r0;
+}
+# undef UDIV_NEEDS_NORMALIZATION
+# define UDIV_NEEDS_NORMALIZATION 1
+# undef udiv_qrnnd
+# define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  udiv_qrnnd_generic (&__q, &__r, __n1, __n0, __d)
+#endif
+
+#endif /* __GMP_ARCH_H */
--- a/sysdeps/hppa/hppa1.1/udiv_qrnnd.S
+++ b/sysdeps/hppa/hppa1.1/udiv_qrnnd.S
@ -1,77 +0,0 @@
-;! HP-PA  __udiv_qrnnd division support, used from longlong.h.
-;! This version runs fast on PA 7000 and later.
-
-;! Copyright (C) 1993-2025 Free Software Foundation, Inc.
-
-;! This file is part of the GNU MP Library.
-
-;! The GNU MP Library is free software; you can redistribute it and/or modify
-;! it under the terms of the GNU Lesser General Public License as published by
-;! the Free Software Foundation; either version 2.1 of the License, or (at your
-;! option) any later version.
-
-;! The GNU MP Library is distributed in the hope that it will be useful, but
-;! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-;! License for more details.
-
-;! You should have received a copy of the GNU Lesser General Public License
-;! along with the GNU MP Library.  If not, see
-;! <https://www.gnu.org/licenses/>.
-
-
-;! INPUT PARAMETERS
-;! rem_ptr	gr26
-;! n1		gr25
-;! n0		gr24
-;! d		gr23
-
-	.text
-L$0000:
-	.word		0x43f00000
-	.word		0x0
-	.export		__udiv_qrnnd
-__udiv_qrnnd:
-	.proc
-	.callinfo	frame=64,no_calls
-	.entry
-	ldo		64(%r30),%r30
-
-	stws		%r25,-16(%r30)	;! n_hi
-	stws		%r24,-12(%r30)	;! n_lo
-	b,l		L$0,%r1
-	ldo		L$0000-L$0(%r1),%r1
-L$0:
-	fldds		-16(%r30),%fr5
-	stws		%r23,-12(%r30)
-	comib,<=	0,%r25,L$1
-	fcnvxf,dbl,dbl	%fr5,%fr5
-	fldds		0(%r1),%fr4
-	fadd,dbl	%fr4,%fr5,%fr5
-L$1:
-	fcpy,sgl	%fr0,%fr6L
-	fldws		-12(%r30),%fr6R
-	fcnvxf,dbl,dbl	%fr6,%fr4
-
-	fdiv,dbl	%fr5,%fr4,%fr5
-
-	fcnvfx,dbl,dbl	%fr5,%fr4
-	fstws		%fr4R,-16(%r30)
-	xmpyu		%fr4R,%fr6R,%fr6
-	ldws		-16(%r30),%r28
-	fstds		%fr6,-16(%r30)
-	ldws		-12(%r30),%r21
-	ldws		-16(%r30),%r20
-	sub		%r24,%r21,%r22
-	subb		%r25,%r20,%r1
-	comib,=		0,%r1,L$2
-	ldo		-64(%r30),%r30
-
-	add		%r22,%r23,%r22
-	ldo		-1(%r28),%r28
-L$2:
-	bv		0(%r2)
-	stws		%r22,0(%r26)
-
-	.exit
-	.procend
--- a/sysdeps/hppa/udiv_qrnnd.S
+++ b/sysdeps/hppa/udiv_qrnnd.S
@ -1,285 +0,0 @@
-;! HP-PA  __udiv_qrnnd division support, used from longlong.h.
-;! This version runs fast on pre-PA7000 CPUs.
-
-;! Copyright (C) 1993-2025 Free Software Foundation, Inc.
-
-;! This file is part of the GNU MP Library.
-
-;! The GNU MP Library is free software; you can redistribute it and/or modify
-;! it under the terms of the GNU Lesser General Public License as published by
-;! the Free Software Foundation; either version 2.1 of the License, or (at your
-;! option) any later version.
-
-;! The GNU MP Library is distributed in the hope that it will be useful, but
-;! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;! or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-;! License for more details.
-
-;! You should have received a copy of the GNU Lesser General Public License
-;! along with the GNU MP Library.  If not, see
-;! <https://www.gnu.org/licenses/>.
-
-
-;! INPUT PARAMETERS
-;! rem_ptr	gr26
-;! n1		gr25
-;! n0		gr24
-;! d		gr23
-
-;! The code size is a bit excessive.  We could merge the last two ds;addc
-;! sequences by simply moving the "bb,< Odd" instruction down.  The only
-;! trouble is the FFFFFFFF code that would need some hacking.
-
-	.text
-	.export		__udiv_qrnnd
-__udiv_qrnnd:
-	.proc
-	.callinfo	frame=0,no_calls
-	.entry
-
-	comb,<		%r23,%r0,L$largedivisor
-	 sub		%r0,%r23,%r1		;! clear cy as side-effect
-	ds		%r0,%r1,%r0
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r23,%r25
-	addc		%r24,%r24,%r28
-	ds		%r25,%r23,%r25
-	comclr,>=	%r25,%r0,%r0
-	addl		%r25,%r23,%r25
-	stws		%r25,0(%r26)
-	bv		0(%r2)
-	 addc		%r28,%r28,%r28
-
-L$largedivisor:
-	extru		%r24,31,1,%r20		;! r20 = n0 & 1
-	bb,<		%r23,31,L$odd
-	 extru		%r23,30,31,%r22		;! r22 = d >> 1
-	shd		%r25,%r24,1,%r24	;! r24 = new n0
-	extru		%r25,30,31,%r25		;! r25 = new n1
-	sub		%r0,%r22,%r21
-	ds		%r0,%r21,%r0
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	comclr,>=	%r25,%r0,%r0
-	addl		%r25,%r22,%r25
-	sh1addl		%r25,%r20,%r25
-	stws		%r25,0(%r26)
-	bv		0(%r2)
-	 addc		%r24,%r24,%r28
-
-L$odd:	addib,sv,n	1,%r22,L$FF..		;! r22 = (d / 2 + 1)
-	shd		%r25,%r24,1,%r24	;! r24 = new n0
-	extru		%r25,30,31,%r25		;! r25 = new n1
-	sub		%r0,%r22,%r21
-	ds		%r0,%r21,%r0
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r24
-	ds		%r25,%r22,%r25
-	addc		%r24,%r24,%r28
-	comclr,>=	%r25,%r0,%r0
-	addl		%r25,%r22,%r25
-	sh1addl		%r25,%r20,%r25
-;! We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
-	add,nuv		%r28,%r25,%r25
-	addl		%r25,%r1,%r25
-	addc		%r0,%r28,%r28
-	sub,<<		%r25,%r23,%r0
-	addl		%r25,%r1,%r25
-	stws		%r25,0(%r26)
-	bv		0(%r2)
-	 addc		%r0,%r28,%r28
-
-;! This is just a special case of the code above.
-;! We come here when d == 0xFFFFFFFF
-L$FF..:	add,uv		%r25,%r24,%r24
-	sub,<<		%r24,%r23,%r0
-	ldo		1(%r24),%r24
-	stws		%r24,0(%r26)
-	bv		0(%r2)
-	 addc		%r0,%r25,%r28
-
-	.exit
-	.procend
--- a/sysdeps/ieee754/dbl-64/dbl2mpn.c
+++ b/sysdeps/ieee754/dbl-64/dbl2mpn.c
@ -18,6 +18,7 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 #include <ieee754.h>
 #include <float.h>
 #include <stdlib.h>
--- a/sysdeps/ieee754/ldbl-128/ldbl2mpn.c
+++ b/sysdeps/ieee754/ldbl-128/ldbl2mpn.c
@ -18,6 +18,7 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 #include <ieee754.h>
 #include <float.h>
 #include <math.h>
--- a/sysdeps/ieee754/ldbl-96/ldbl2mpn.c
+++ b/sysdeps/ieee754/ldbl-96/ldbl2mpn.c
@ -18,6 +18,7 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 #include <ieee754.h>
 #include <float.h>
 #include <math.h>
--- a/sysdeps/wordsize-32/divdi3.c
+++ b/sysdeps/wordsize-32/divdi3.c
@ -25,20 +25,16 @@
 #error This is for 32-bit targets only
 #endif

-typedef unsigned int UQItype	__attribute__ ((mode (QI)));
-typedef          int SItype	__attribute__ ((mode (SI)));
-typedef unsigned int USItype	__attribute__ ((mode (SI)));
-typedef          int DItype	__attribute__ ((mode (DI)));
-typedef unsigned int UDItype	__attribute__ ((mode (DI)));
 #define Wtype SItype
 #define HWtype SItype
 #define DWtype DItype
-#define UWtype USItype
 #define UHWtype USItype
 #define UDWtype UDItype
-#define W_TYPE_SIZE 32

+#include <gmp.h>
+#include <stdlib/gmp-impl.h>
 #include <stdlib/longlong.h>
+#include <gmp-arch.h>

 #if __BYTE_ORDER == __BIG_ENDIAN
 struct DWstruct { Wtype high, low;};
--- a/sysdeps/x86/gmp-arch.h
+++ b/sysdeps/x86/gmp-arch.h
@ -0,0 +1,52 @@
+/* Multiprecision generic functions.  x86 version.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef __GMP_X64_ARCH_H
+#define __GMP_X64_ARCH_H
+
+#include <gmp.h>
+
+static __always_inline void
+udiv_qrnnd_x86 (mp_limb_t *q, mp_limb_t *r, mp_limb_t n1, mp_limb_t n0,
+		mp_limb_t d)
+{
+#ifdef __x86_64__
+  asm ("div{q} %4"
+       : "=a" (*q),
+         "=d" (*r)
+       : "0" (n0),
+	 "1" (n1),
+	 "rm" (d));
+#else
+  asm ("div{l} %4"
+       : "=a" (*q),
+         "=d" (*r)
+       : "0" (n0),
+	 "1" (n1),
+	 "rm" (d));
+#endif
+}
+#undef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#undef udiv_qrnnd
+#define udiv_qrnnd(__q, __r, __n1, __n0, __d) \
+  udiv_qrnnd_x86 (&__q, &__r, __n1, __n0, __d)
+
+#include <sysdeps/generic/gmp-arch.h>
+
+#endif
--- a/sysdeps/x86/ldbl2mpn.c
+++ b/sysdeps/x86/ldbl2mpn.c
@ -18,6 +18,7 @@
 #include "gmp.h"
 #include "gmp-impl.h"
 #include "longlong.h"
+#include <gmp-arch.h>
 #include <ieee754.h>
 #include <float.h>
 #include <stdlib.h>