From e5086322295e5a345af02d09abfcf8ddca2d0897 Mon Sep 17 00:00:00 2001
From: Stephen Canon <scanon@apple.com>
Date: Thu, 1 Jul 2010 15:52:42 +0000
Subject: Adding soft-float comparisons, addition, subtraction, multiplication
 and negation

git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@107400 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/adddf3.c      | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/addsf3.c      | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/comparedf2.c  | 127 +++++++++++++++++++++++++++++++++++++++++++
 lib/comparesf2.c  | 133 +++++++++++++++++++++++++++++++++++++++++++++
 lib/extendsfdf2.c | 133 +++++++++++++++++++++++++++++++++++++++++++++
 lib/fp_lib.h      | 123 +++++++++++++++++++++++++++++++++++++++++
 lib/muldf3.c      | 135 +++++++++++++++++++++++++++++++++++++++++++++
 lib/mulsf3.c      | 112 ++++++++++++++++++++++++++++++++++++++
 lib/negdf2.c      |  13 +++++
 lib/negsf2.c      |  13 +++++
 10 files changed, 1099 insertions(+)
 create mode 100644 lib/adddf3.c
 create mode 100644 lib/addsf3.c
 create mode 100644 lib/comparedf2.c
 create mode 100644 lib/comparesf2.c
 create mode 100644 lib/extendsfdf2.c
 create mode 100644 lib/fp_lib.h
 create mode 100644 lib/muldf3.c
 create mode 100644 lib/mulsf3.c
 create mode 100644 lib/negdf2.c
 create mode 100644 lib/negsf2.c

diff --git a/lib/adddf3.c b/lib/adddf3.c
new file mode 100644
index 000000000..c41cc2ecd
--- /dev/null
+++ b/lib/adddf3.c
@@ -0,0 +1,150 @@
+/*
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// This file implements double-precision soft-float addition and subtraction
+// with the IEEE-754 default rounding (to nearest, ties to even).
+
+fp_t __adddf3(fp_t a, fp_t b) {
+    
+    rep_t aRep = toRep(a);
+    rep_t bRep = toRep(b);
+    const rep_t aAbs = aRep & absMask;
+    const rep_t bAbs = bRep & absMask;
+    
+    // Detect if a or b is zero, infinity, or NaN.
+    if (aAbs - 1U >= infRep - 1U || bAbs - 1U >= infRep - 1U) {
+        
+        // NaN + anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything + NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+        
+        if (aAbs == infRep) {
+            // +/-infinity + -/+infinity = qNaN
+            if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep);
+            // +/-infinity + anything remaining = +/- infinity
+            else return a;
+        }
+        
+        // anything remaining + +/-infinity = +/-infinity
+        if (bAbs == infRep) return b;
+        
+        // zero + anything = anything
+        if (!aAbs) {
+            // but we need to get the sign right for zero + zero
+            if (!bAbs) return fromRep(toRep(a) & toRep(b));
+            else return b;
+        }
+        
+        // anything + zero = anything
+        if (!bAbs) return a;
+    }
+    
+    // Swap a and b if necessary so that a has the larger absolute value.
+    if (bAbs > aAbs) {
+        const rep_t temp = aRep;
+        aRep = bRep;
+        bRep = temp;
+    }
+    
+    // Extract the exponent and significand from the (possibly swapped) a and b.
+    int aExponent = aRep >> significandBits & maxExponent;
+    int bExponent = bRep >> significandBits & maxExponent;
+    rep_t aSignificand = aRep & significandMask;
+    rep_t bSignificand = bRep & significandMask;
+    
+    // Normalize any denormals, and adjust the exponent accordingly.
+    if (aExponent == 0) aExponent = normalize(&aSignificand);
+    if (bExponent == 0) bExponent = normalize(&bSignificand);
+    
+    // The sign of the result is the sign of the larger operand, a.  If they
+    // have opposite signs, we are performing a subtraction; otherwise addition.
+    const rep_t resultSign = aRep & signBit;
+    const bool subtraction = (aRep ^ bRep) & signBit;
+    
+    // Shift the significands to give us round, guard and sticky, and or in the
+    // implicit significand bit.  (If we fell through from the denormal path it
+    // was already set by normalize( ), but setting it twice won't hurt
+    // anything.)
+    aSignificand = (aSignificand | implicitBit) << 3;
+    bSignificand = (bSignificand | implicitBit) << 3;
+    
+    // Shift the significand of b by the difference in exponents, with a sticky
+    // bottom bit to get rounding correct.
+    const int align = aExponent - bExponent;
+    if (align) {
+        if (align < typeWidth) {
+            const bool sticky = bSignificand << (typeWidth - align);
+            bSignificand = bSignificand >> align | sticky;
+        } else {
+            bSignificand = 1; // sticky; b is known to be non-zero.
+        }
+    }
+    
+    if (subtraction) {
+        aSignificand -= bSignificand;
+        
+        // If a == -b, return +zero.
+        if (aSignificand == 0) return fromRep(0);
+        
+        // If partial cancellation occured, we need to left-shift the result
+        // and adjust the exponent:
+        if (aSignificand < implicitBit << 3) {
+            const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+            aSignificand <<= shift;
+            aExponent -= shift;
+        }
+    }
+    
+    else /* addition */ {
+        aSignificand += bSignificand;
+        
+        // If the addition carried up, we need to right-shift the result and
+        // adjust the exponent:
+        if (aSignificand & implicitBit << 4) {
+            const bool sticky = aSignificand & 1;
+            aSignificand = aSignificand >> 1 | sticky;
+            aExponent += 1;
+        }
+    }
+    
+    // If we have overflowed the type, return +/- infinity:
+    if (aExponent >= maxExponent) return fromRep(infRep | resultSign);
+    
+    if (aExponent <= 0) {
+        // Result is denormal before rounding; the exponent is zero and we
+        // need to shift the significand.
+        const int shift = 1 - aExponent;
+        const bool sticky = aSignificand << (typeWidth - shift);
+        aSignificand = aSignificand >> shift | sticky;
+        aExponent = 0;
+    }
+    
+    // Low three bits are round, guard, and sticky.
+    const int roundGuardSticky = aSignificand & 0x7;
+    
+    // Shift the significand into place, and mask off the implicit bit.
+    rep_t result = aSignificand >> 3 & significandMask;
+    
+    // Insert the exponent and sign.
+    result |= (rep_t)aExponent << significandBits;
+    result |= resultSign;
+    
+    // Final rounding.  The result may overflow to infinity, but that is the
+    // correct result in that case.
+    if (roundGuardSticky > 0x4) result++;
+    if (roundGuardSticky == 0x4) result += result & 1;
+    return fromRep(result);
+}
+
+// Subtraction; flip the sign bit of b and add.
+fp_t __subdf3(fp_t a, fp_t b) {
+    return __adddf3(a, fromRep(toRep(b) ^ signBit));
+}
diff --git a/lib/addsf3.c b/lib/addsf3.c
new file mode 100644
index 000000000..e6d132084
--- /dev/null
+++ b/lib/addsf3.c
@@ -0,0 +1,160 @@
+/*
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+// This file implements single-precision soft-float addition and subtraction
+// with the IEEE-754 default rounding (to nearest, ties to even).
+
+fp_t __addsf3(fp_t a, fp_t b) {
+
+    rep_t aRep = toRep(a);
+    rep_t bRep = toRep(b);
+    const rep_t aAbs = aRep & absMask;
+    const rep_t bAbs = bRep & absMask;
+    
+    // Detect if a or b is zero, infinity, or NaN.
+    if (aAbs - 1U >= infRep - 1U || bAbs - 1U >= infRep - 1U) {
+        
+        // NaN + anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything + NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+        
+        if (aAbs == infRep) {
+            // +/-infinity + -/+infinity = qNaN
+            if ((toRep(a) ^ toRep(b)) == signBit) return fromRep(qnanRep);
+            // +/-infinity + anything remaining = +/- infinity
+            else return a;
+        }
+        
+        // anything remaining + +/-infinity = +/-infinity
+        if (bAbs == infRep) return b;
+        
+        // zero + anything = anything
+        if (!aAbs) {
+            // but we need to get the sign right for zero + zero
+            if (!bAbs) return fromRep(toRep(a) & toRep(b));
+            else return b;
+        }
+        
+        // anything + zero = anything
+        if (!bAbs) return a;
+    }
+    
+    // Swap a and b if necessary so that a has the larger absolute value.
+    if (bAbs > aAbs) {
+        const rep_t temp = aRep;
+        aRep = bRep;
+        bRep = temp;
+    }
+    
+    // Extract the exponent and significand from the (possibly swapped) a and b.
+    int aExponent = aRep >> significandBits & maxExponent;
+    int bExponent = bRep >> significandBits & maxExponent;
+    rep_t aSignificand = aRep & significandMask;
+    rep_t bSignificand = bRep & significandMask;
+    
+    // Normalize any denormals, and adjust the exponent accordingly.
+    if (aExponent == 0) aExponent = normalize(&aSignificand);
+    if (bExponent == 0) bExponent = normalize(&bSignificand);
+    
+    // The sign of the result is the sign of the larger operand, a.  If they
+    // have opposite signs, we are performing a subtraction; otherwise addition.
+    const rep_t resultSign = aRep & signBit;
+    const bool subtraction = (aRep ^ bRep) & signBit;
+    
+    // Shift the significands to give us round, guard and sticky, and or in the
+    // implicit significand bit.  (If we fell through from the denormal path it
+    // was already set by normalize( ), but setting it twice won't hurt
+    // anything.)
+    aSignificand = (aSignificand | implicitBit) << 3;
+    bSignificand = (bSignificand | implicitBit) << 3;
+    
+    // Shift the significand of b by the difference in exponents, with a sticky
+    // bottom bit to get rounding correct.
+    const int align = aExponent - bExponent;
+    if (align) {
+        if (align < typeWidth) {
+            const bool sticky = bSignificand << (typeWidth - align);
+            bSignificand = bSignificand >> align | sticky;
+        } else {
+            bSignificand = 1; // sticky; b is known to be non-zero.
+        }
+    }
+    
+    if (subtraction) {
+        aSignificand -= bSignificand;
+        
+        // If a == -b, return +zero.
+        if (aSignificand == 0) return fromRep(0);
+        
+        // If partial cancellation occured, we need to left-shift the result
+        // and adjust the exponent:
+        if (aSignificand < implicitBit << 3) {
+            const int shift = rep_clz(aSignificand) - rep_clz(implicitBit << 3);
+            aSignificand <<= shift;
+            aExponent -= shift;
+        }
+    }
+    
+    else /* addition */ {
+        aSignificand += bSignificand;
+        
+        // If the addition carried up, we need to right-shift the result and
+        // adjust the exponent:
+        if (aSignificand & implicitBit << 4) {
+            const bool sticky = aSignificand & 1;
+            aSignificand = aSignificand >> 1 | sticky;
+            aExponent += 1;
+        }
+    }
+    
+    // If we have overflowed the type, return +/- infinity:
+    if (aExponent >= maxExponent) return fromRep(infRep | resultSign);
+    
+    if (aExponent <= 0) {
+        // Result is denormal before rounding; the exponent is zero and we
+        // need to shift the significand.
+        const int shift = 1 - aExponent;
+        const bool sticky = aSignificand << (typeWidth - shift);
+        aSignificand = aSignificand >> shift | sticky;
+        aExponent = 0;
+    }
+    
+    // Low three bits are round, guard, and sticky.
+    const int roundGuardSticky = aSignificand & 0x7;
+    
+    // Shift the significand into place, and mask off the implicit bit.
+    rep_t result = aSignificand >> 3 & significandMask;
+    
+    // Insert the exponent and sign.
+    result |= (rep_t)aExponent << significandBits;
+    result |= resultSign;
+    
+    // Final rounding.  The result may overflow to infinity, but that is the
+    // correct result in that case.
+    if (roundGuardSticky > 0x4) result++;
+    if (roundGuardSticky == 0x4) result += result & 1;
+    return fromRep(result);
+}
+
+// Subtraction; flip the sign bit of b and add.
+fp_t __subsf3(fp_t a, fp_t b) {
+    return __addsf3(a, fromRep(toRep(b) ^ signBit));
+}
+
+
+
+
+
+
+
+
+
+
diff --git a/lib/comparedf2.c b/lib/comparedf2.c
new file mode 100644
index 000000000..de700808a
--- /dev/null
+++ b/lib/comparedf2.c
@@ -0,0 +1,127 @@
+/*
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// This file implements the following soft-float comparison routines:
+//
+//   __eqdf2   __gedf2   __nedf2
+//   __ledf2   __gtdf2
+//   __ltdf2
+//   __nedf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+//   __ledf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                         1 if either a or b is NaN
+//
+//   __gedf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                        -1 if either a or b is NaN
+//
+//   __unorddf2(a,b) returns 0 if both a and b are numbers
+//                           1 if either a or b is NaN
+//
+// Note that __ledf2( ) and __gedf2( ) are identical except in their handling of
+// NaN values.
+
+enum LE_RESULT {
+    LE_LESS      = -1,
+    LE_EQUAL     =  0,
+    LE_GREATER   =  1,
+    LE_UNORDERED =  1
+};
+
+enum LE_RESULT __ledf2(fp_t a, fp_t b) {
+    
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+    
+    // If either a or b is NaN, they are unordered.
+    if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;
+    
+    // If a and b are both zeros, they are equal.
+    if ((aAbs | bAbs) == 0) return LE_EQUAL;
+    
+    // If at least one of a and b is positive, we get the same result comparing
+    // a and b as signed integers as we would with a floating-point compare.
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+    
+    // Otherwise, both are negative, so we need to flip the sense of the
+    // comparison to get the correct result.  (This assumes a twos- or ones-
+    // complement integer representation; if integers are represented in a
+    // sign-magnitude representation, then this flip is incorrect).
+    else {
+        if (aInt > bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+}
+
+
+enum GE_RESULT {
+    GE_LESS      = -1,
+    GE_EQUAL     =  0,
+    GE_GREATER   =  1,
+    GE_UNORDERED = -1   // Note: different from LE_UNORDERED
+};
+
+enum GE_RESULT __gedf2(fp_t a, fp_t b) {
+    
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+    
+    if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
+    if ((aAbs | bAbs) == 0) return GE_EQUAL;
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    } else {
+        if (aInt > bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    }
+}
+
+int __unorddf2(fp_t a, fp_t b) {
+    const rep_t aAbs = toRep(a) & absMask;
+    const rep_t bAbs = toRep(b) & absMask;
+    return aAbs > infRep || bAbs > infRep;
+}
+
+enum LE_RESULT __eqdf2(fp_t a, fp_t b) {
+    return __ledf2(a, b);
+}
+
+enum LE_RESULT __ltdf2(fp_t a, fp_t b) {
+    return __ledf2(a, b);
+}
+
+enum LE_RESULT __nedf2(fp_t a, fp_t b) {
+    return __ledf2(a, b);
+}
+
+enum GE_RESULT __gtdf2(fp_t a, fp_t b) {
+    return __gedf2(a, b);
+}
+
diff --git a/lib/comparesf2.c b/lib/comparesf2.c
new file mode 100644
index 000000000..6706f192a
--- /dev/null
+++ b/lib/comparesf2.c
@@ -0,0 +1,133 @@
+//===-- lib/comparesf2.c - Single-precision comparisons -----------*- C -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the following soft-fp_t comparison routines:
+//
+//   __eqsf2   __gesf2   __nesf2
+//   __lesf2   __gtsf2
+//   __ltsf2
+//   __nesf2
+//
+// The semantics of the routines grouped in each column are identical, so there
+// is a single implementation for each, and wrappers to provide the other names.
+//
+// The main routines behave as follows:
+//
+//   __lesf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                         1 if either a or b is NaN
+//
+//   __gesf2(a,b) returns -1 if a < b
+//                         0 if a == b
+//                         1 if a > b
+//                        -1 if either a or b is NaN
+//
+//   __unordsf2(a,b) returns 0 if both a and b are numbers
+//                           1 if either a or b is NaN
+//
+// Note that __lesf2( ) and __gesf2( ) are identical except in their handling of
+// NaN values.
+//
+//===----------------------------------------------------------------------===//
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+enum LE_RESULT {
+    LE_LESS      = -1,
+    LE_EQUAL     =  0,
+    LE_GREATER   =  1,
+    LE_UNORDERED =  1
+};
+
+enum LE_RESULT __lesf2(fp_t a, fp_t b) {
+    
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+    
+    // If either a or b is NaN, they are unordered.
+    if (aAbs > infRep || bAbs > infRep) return LE_UNORDERED;
+    
+    // If a and b are both zeros, they are equal.
+    if ((aAbs | bAbs) == 0) return LE_EQUAL;
+    
+    // If at least one of a and b is positive, we get the same result comparing
+    // a and b as signed integers as we would with a fp_ting-point compare.
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+    
+    // Otherwise, both are negative, so we need to flip the sense of the
+    // comparison to get the correct result.  (This assumes a twos- or ones-
+    // complement integer representation; if integers are represented in a
+    // sign-magnitude representation, then this flip is incorrect).
+    else {
+        if (aInt > bInt) return LE_LESS;
+        else if (aInt == bInt) return LE_EQUAL;
+        else return LE_GREATER;
+    }
+}
+
+
+enum GE_RESULT {
+    GE_LESS      = -1,
+    GE_EQUAL     =  0,
+    GE_GREATER   =  1,
+    GE_UNORDERED = -1   // Note: different from LE_UNORDERED
+};
+
+enum GE_RESULT __gesf2(fp_t a, fp_t b) {
+    
+    const srep_t aInt = toRep(a);
+    const srep_t bInt = toRep(b);
+    const rep_t aAbs = aInt & absMask;
+    const rep_t bAbs = bInt & absMask;
+    
+    if (aAbs > infRep || bAbs > infRep) return GE_UNORDERED;
+    if ((aAbs | bAbs) == 0) return GE_EQUAL;
+    if ((aInt & bInt) >= 0) {
+        if (aInt < bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    } else {
+        if (aInt > bInt) return GE_LESS;
+        else if (aInt == bInt) return GE_EQUAL;
+        else return GE_GREATER;
+    }
+}
+
+int __unordsf2(fp_t a, fp_t b) {
+    const rep_t aAbs = toRep(a) & absMask;
+    const rep_t bAbs = toRep(b) & absMask;
+    return aAbs > infRep || bAbs > infRep;
+}
+
+// The following are just other names for the forgoing routines.
+
+enum LE_RESULT __eqsf2(fp_t a, fp_t b) {
+    return __lesf2(a, b);
+}
+
+enum LE_RESULT __ltsf2(fp_t a, fp_t b) {
+    return __lesf2(a, b);
+}
+
+enum LE_RESULT __nesf2(fp_t a, fp_t b) {
+    return __lesf2(a, b);
+}
+
+enum GE_RESULT __gtsf2(fp_t a, fp_t b) {
+    return __gesf2(a, b);
+}
+
diff --git a/lib/extendsfdf2.c b/lib/extendsfdf2.c
new file mode 100644
index 000000000..87819bd7a
--- /dev/null
+++ b/lib/extendsfdf2.c
@@ -0,0 +1,133 @@
+/*
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#include <stdint.h>
+#include <limits.h>
+
+// This file implements a fairly generic conversion from a narrower to a wider
+// IEEE-754 floating-point type.  The next 10 lines parametrize which types
+// are to be used as the source and destination, the actual name used for
+// the conversion, and a suitable CLZ function for the source representation
+// type.
+//
+// This routine can be trivially adapted to support conversions from 
+// half-precision or to quad-precision. It does not support types that don't
+// use the usual IEEE-754 interchange formats; specifically, some work would be
+// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
+// double-double format.
+//
+// Note please, however, that this implementation is only intended to support
+// *widening* operations; if you need to convert to a *narrower* floating-point
+// type (e.g. double -> float), then this routine will not do what you want it
+// to.
+//
+// It also requires that integer types at least as large as both formats
+// are available on the target platform; this may pose a problem when trying
+// to add support for quad on some 32-bit systems, for example.  You also may
+// run into trouble finding an appropriate CLZ function for wide source types;
+// you will likely need to roll your own on some platforms.
+//
+// Finally, the following assumptions are made:
+//
+// 1. floating-point types and integer types have the same endianness on the
+//    target platform
+//
+// 2. quiet NaNs, if supported, are indicated by the leading bit of the
+//    significand field being set
+
+#define widen __extendsfdf2
+
+typedef float src_t;
+typedef uint32_t src_rep_t;
+#define SRC_REP_C UINT32_C
+static const int srcSigBits = 23;
+#define src_rep_t_clz __builtin_clz
+
+typedef double dst_t;
+typedef uint64_t dst_rep_t;
+#define DST_REP_C UINT64_C
+static const int dstSigBits = 52;
+
+// End of specialization parameters.  Two helper routines for conversion to and
+// from the representation of floating-point data as integer values follow.
+
+static inline src_rep_t srcToRep(src_t x) {
+    const union { src_t f; src_rep_t i; } rep = {.f = x};
+    return rep.i;
+}
+
+static inline dst_t dstFromRep(dst_rep_t x) {
+    const union { dst_t f; dst_rep_t i; } rep = {.i = x};
+    return rep.f;
+}
+
+// End helper routines.  Conversion implementation follows.
+
+dst_t widen(src_t a) {
+    
+    // Various constants whose values follow from the type parameters.
+    // Any reasonable optimizer will fold and propagate all of these.
+    const int srcBits = sizeof(src_t)*CHAR_BIT;
+    const int srcExpBits = srcBits - srcSigBits - 1;
+    const int srcInfExp = (1 << srcExpBits) - 1;
+    const int srcExpBias = srcInfExp >> 1;
+    const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigBits;
+    const src_rep_t srcInfinity = (src_rep_t)srcInfExp << srcSigBits;
+    const src_rep_t srcSignMask = SRC_REP_C(1) << (srcSigBits + srcExpBits);
+    const src_rep_t srcAbsMask = srcSignMask - 1;
+    const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigBits - 1);
+    const src_rep_t srcNaNCode = srcQNaN - 1;
+    const int dstBits = sizeof(dst_t)*CHAR_BIT;
+    const int dstExpBits = dstBits - dstSigBits - 1;
+    const int dstInfExp = (1 << dstExpBits) - 1;
+    const int dstExpBias = dstInfExp >> 1;
+    const dst_rep_t dstMinNormal = DST_REP_C(1) << dstSigBits;
+    
+    // Break a into a sign and representation of the absolute value
+    src_rep_t aRep = srcToRep(a);
+    src_rep_t aAbs = aRep & srcAbsMask;
+    src_rep_t sign = aRep & srcSignMask;
+    dst_rep_t absResult;
+    
+    if (aAbs - srcMinNormal < srcInfinity - srcMinNormal) {
+        // a is a normal number.
+        // Extend to the destination type by shifting the significand and
+        // exponent into the proper position and rebiasing the exponent.
+        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits);
+        absResult += (dst_rep_t)(dstExpBias - srcExpBias) << dstSigBits;
+    }
+    
+    else if (aAbs >= srcInfinity) {
+        // a is NaN or infinity.
+        // Conjure the result by beginning with infinity, then setting the qNaN
+        // bit if appropriate and then by right-aligning the rest of the
+        // trailing NaN payload field.
+        absResult = (dst_rep_t)dstInfExp << dstSigBits;
+        absResult |= (dst_rep_t)(aAbs & srcQNaN) << (dstSigBits - srcSigBits);
+        absResult |= (aAbs & srcNaNCode);
+    }
+    
+    else if (aAbs) {
+        // a is denormal.
+        // renormalize the significand and clear the leading bit, then insert
+        // the correct adjusted exponent in the destination type.
+        const int scale = src_rep_t_clz(aAbs) - src_rep_t_clz(srcMinNormal);
+        absResult = (dst_rep_t)aAbs << (dstSigBits - srcSigBits + scale);
+        absResult ^= dstMinNormal;
+        const int resultExponent = dstExpBias - srcExpBias - scale + 1;
+        absResult |= (dst_rep_t)resultExponent << dstSigBits;
+    }
+
+    else {
+        // a is zero.
+        absResult = 0;
+    }
+    
+    // Apply the signbit to (dst_t)abs(a).
+    dst_rep_t result = absResult | (dst_rep_t)sign << (dstBits - srcBits);
+    return dstFromRep(result);
+}
diff --git a/lib/fp_lib.h b/lib/fp_lib.h
new file mode 100644
index 000000000..b3c010491
--- /dev/null
+++ b/lib/fp_lib.h
@@ -0,0 +1,123 @@
+// This file is a configuration header for soft-float routines in compiler-rt.
+// This file does not provide any part of the compiler-rt interface.
+
+// Assumes that float and double correspond to the IEEE-754 binary32 and
+// binary64 types, respectively.
+
+#ifndef FP_LIB_HEADER
+#define FP_LIB_HEADER
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <limits.h>
+
+#if defined SINGLE_PRECISION
+#if 0
+#pragma mark single definitions
+#endif
+
+typedef uint32_t rep_t;
+typedef int32_t srep_t;
+typedef float fp_t;
+#define REP_C UINT32_C
+#define significandBits 23
+
+static inline int rep_clz(rep_t a) {
+    return __builtin_clz(a);
+}
+
+#elif defined DOUBLE_PRECISION
+#if 0
+#pragma mark double definitions
+#endif
+
+typedef uint64_t rep_t;
+typedef int64_t srep_t;
+typedef double fp_t;
+#define REP_C UINT64_C
+#define significandBits 52
+
+static inline int rep_clz(rep_t a) {
+#if defined __LP64__
+    return __builtin_clzl(a);
+#else
+    if (a & REP_C(0xffffffff00000000))
+        return 32 + __builtin_clz(a >> 32);
+    else 
+        return __builtin_clz(a & REP_C(0xffffffff));
+#endif
+}
+
+#else
+#error Either SINGLE_PRECISION or DOUBLE_PRECISION must be defined.
+#endif
+
+#if 0
+#pragma mark -
+#pragma mark integer constants
+#endif
+
+#define typeWidth       (sizeof(rep_t)*CHAR_BIT)
+#define exponentBits    (typeWidth - significandBits - 1)
+#define maxExponent     ((1 << exponentBits) - 1)
+#define exponentBias    (maxExponent >> 1)
+
+#if 0
+#pragma mark -
+#pragma mark rep_t constants
+#endif
+
+#define implicitBit     (REP_C(1) << significandBits)
+#define significandMask (implicitBit - 1U)
+#define signBit         (REP_C(1) << (significandBits + exponentBits))
+#define absMask         (signBit - 1U)
+#define exponentMask    (absMask ^ significandMask)
+#define oneRep          ((rep_t)exponentBias << significandBits)
+#define infRep          exponentMask
+#define quietBit        (implicitBit >> 1)
+#define qnanRep         (exponentMask | quietBit)
+
+#if 0
+#pragma mark -
+#pragma mark generic functions
+#endif
+
+static inline rep_t toRep(fp_t x) {
+    const union { fp_t f; rep_t i; } rep = {.f = x};
+    return rep.i;
+}
+
+static inline fp_t fromRep(rep_t x) {
+    const union { fp_t f; rep_t i; } rep = {.i = x};
+    return rep.f;
+}
+
+static inline int normalize(rep_t *significand) {
+    const int shift = rep_clz(*significand) - rep_clz(implicitBit);
+    *significand <<= shift;
+    return 1 - shift;
+}
+
+static inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
+    *hi = *hi << count | *lo >> (typeWidth - count);
+    *lo = *lo << count;
+}
+
+static inline void wideRightShiftWithSticky(rep_t *hi, rep_t *lo, int count) {
+    if (count < typeWidth) {
+        const bool sticky = *lo << (typeWidth - count);
+        *lo = *hi << (typeWidth - count) | *lo >> count | sticky;
+        *hi = *hi >> count;
+    }
+    else if (count < 2*typeWidth) {
+        const bool sticky = *hi << (2*typeWidth - count) | *lo;
+        *lo = *hi >> (count - typeWidth) | sticky;
+        *hi = 0;
+    } else {
+        const bool sticky = *hi | *lo;
+        *lo = sticky;
+        *hi = 0;
+    }
+}
+
+#endif // FP_LIB_HEADER
diff --git a/lib/muldf3.c b/lib/muldf3.c
new file mode 100644
index 000000000..77e9ed19c
--- /dev/null
+++ b/lib/muldf3.c
@@ -0,0 +1,135 @@
+/*
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+// This file implements double-precision soft-float multiplication with the
+// IEEE-754 default rounding (to nearest, ties to even).
+
+#define loWord(a) (a & 0xffffffffU)
+#define hiWord(a) (a >> 32)
+
+// 64x64 -> 128 wide multiply for platforms that don't have such an operation;
+// some 64-bit platforms have this operation, but they tend to have hardware
+// floating-point, so we don't bother with a special case for them here.
+static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+    // Each of the component 32x32 -> 64 products
+    const uint64_t plolo = loWord(a) * loWord(b);
+    const uint64_t plohi = loWord(a) * hiWord(b);
+    const uint64_t philo = hiWord(a) * loWord(b);
+    const uint64_t phihi = hiWord(a) * hiWord(b);
+    // Sum terms that compute to lo in a way that allows us to get the carry
+    const uint64_t r0 = loWord(plolo);
+    const uint64_t r1 = hiWord(plolo) + loWord(plohi) + loWord(philo);
+    *lo = r0 + (r1 << 32);
+    // Sum terms contributing to hi with the carry from lo
+    *hi = hiWord(plohi) + hiWord(philo) + hiWord(r1) + phihi;
+}
+
+fp_t __muldf3(fp_t a, fp_t b) {
+    
+    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
+    
+    rep_t aSignificand = toRep(a) & significandMask;
+    rep_t bSignificand = toRep(b) & significandMask;
+    int scale = 0;
+    
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+        
+        const rep_t aAbs = toRep(a) & absMask;
+        const rep_t bAbs = toRep(b) & absMask;
+        
+        // NaN * anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything * NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+        
+        if (aAbs == infRep) {
+            // infinity * non-zero = +/- infinity
+            if (bAbs) return fromRep(aAbs | productSign);
+            // infinity * zero = NaN
+            else return fromRep(qnanRep);
+        }
+        
+        if (bAbs == infRep) {
+            // non-zero * infinity = +/- infinity
+            if (aAbs) return fromRep(bAbs | productSign);
+            // zero * infinity = NaN
+            else return fromRep(qnanRep);
+        }
+        
+        // zero * anything = +/- zero
+        if (!aAbs) return fromRep(productSign);
+        // anything * zero = +/- zero
+        if (!bAbs) return fromRep(productSign);
+        
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if (aAbs < implicitBit) scale += normalize(&aSignificand);
+        if (bAbs < implicitBit) scale += normalize(&bSignificand);
+    }
+    
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    aSignificand |= implicitBit;
+    bSignificand |= implicitBit;
+    
+    // Get the significand of a*b.  Before multiplying the significands, shift
+    // one of them left to left-align it in the field.  Thus, the product will
+    // have (exponentBits + 2) integral digits, all but two of which must be
+    // zero.  Normalizing this result is just a conditional left-shift by one
+    // and bumping the exponent accordingly.
+    rep_t productHi, productLo;
+    wideMultiply(aSignificand, bSignificand << exponentBits,
+                 &productHi, &productLo);
+    
+    int productExponent = aExponent + bExponent - exponentBias + scale;
+    
+    // Normalize the significand, adjust exponent if needed.
+    if (productHi & implicitBit) productExponent++;
+    else wideLeftShift(&productHi, &productLo, 1);
+    
+    // If we have overflowed the type, return +/- infinity.
+    if (productExponent >= maxExponent) return fromRep(infRep | productSign);
+    
+    if (productExponent <= 0) {
+        // Result is denormal before rounding
+        //
+        // If the result is so small that it just underflows to zero, return
+        // a zero of the appropriate sign.  Mathematically there is no need to
+        // handle this case separately, but we make it a special case to
+        // simplify the shift logic.
+        const int shift = 1 - productExponent;
+        if (shift >= typeWidth) return fromRep(productSign);
+        
+        // Otherwise, shift the significand of the result so that the round
+        // bit is the high bit of productLo.
+        wideRightShiftWithSticky(&productHi, &productLo, shift);
+    }
+    
+    else {
+        // Result is normal before rounding; insert the exponent.
+        productHi &= significandMask;
+        productHi |= (rep_t)productExponent << significandBits;
+    }
+    
+    // Insert the sign of the result:
+    productHi |= productSign;
+    
+    // Final rounding.  The final result may overflow to infinity, or underflow
+    // to zero, but those are the correct results in those cases.  We use the
+    // default IEEE-754 round-to-nearest, ties-to-even rounding mode.
+    if (productLo > signBit) productHi++;
+    if (productLo == signBit) productHi += productHi & 1;
+    return fromRep(productHi);
+}
diff --git a/lib/mulsf3.c b/lib/mulsf3.c
new file mode 100644
index 000000000..8c8b3144f
--- /dev/null
+++ b/lib/mulsf3.c
@@ -0,0 +1,112 @@
+/*
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+// This file implements single-precision soft-float multiplication with the
+// IEEE-754 default rounding (to nearest, ties to even).
+
+// 32x32 --> 64 bit multiply
+static inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
+    const uint64_t product = (uint64_t)a*b;
+    *hi = product >> 32;
+    *lo = product;
+}
+
+fp_t __mulsf3(fp_t a, fp_t b) {
+    
+    const unsigned int aExponent = toRep(a) >> significandBits & maxExponent;
+    const unsigned int bExponent = toRep(b) >> significandBits & maxExponent;
+    const rep_t productSign = (toRep(a) ^ toRep(b)) & signBit;
+    
+    rep_t aSignificand = toRep(a) & significandMask;
+    rep_t bSignificand = toRep(b) & significandMask;
+    int scale = 0;
+    
+    // Detect if a or b is zero, denormal, infinity, or NaN.
+    if (aExponent-1U >= maxExponent-1U || bExponent-1U >= maxExponent-1U) {
+        
+        const rep_t aAbs = toRep(a) & absMask;
+        const rep_t bAbs = toRep(b) & absMask;
+        
+        // NaN * anything = qNaN
+        if (aAbs > infRep) return fromRep(toRep(a) | quietBit);
+        // anything * NaN = qNaN
+        if (bAbs > infRep) return fromRep(toRep(b) | quietBit);
+        
+        if (aAbs == infRep) {
+            // infinity * non-zero = +/- infinity
+            if (bAbs) return fromRep(aAbs | productSign);
+            // infinity * zero = NaN
+            else return fromRep(qnanRep);
+        }
+        
+        if (bAbs == infRep) {
+            // non-zero * infinity = +/- infinity
+            if (aAbs) return fromRep(bAbs | productSign);
+            // zero * infinity = NaN
+            else return fromRep(qnanRep);
+        }
+        
+        // zero * anything = +/- zero
+        if (!aAbs) return fromRep(productSign);
+        // anything * zero = +/- zero
+        if (!bAbs) return fromRep(productSign);
+        
+        // one or both of a or b is denormal, the other (if applicable) is a
+        // normal number.  Renormalize one or both of a and b, and set scale to
+        // include the necessary exponent adjustment.
+        if (aAbs < implicitBit) scale += normalize(&aSignificand);
+        if (bAbs < implicitBit) scale += normalize(&bSignificand);
+    }
+    
+    // Or in the implicit significand bit.  (If we fell through from the
+    // denormal path it was already set by normalize( ), but setting it twice
+    // won't hurt anything.)
+    aSignificand |= implicitBit;
+    bSignificand |= implicitBit;
+    
+    // Get the significand of a*b.  Before multiplying the significands, shift
+    // one of them left to left-align it in the field.  Thus, the product will
+    // have (exponentBits + 2) integral digits, all but two of which must be
+    // zero.  Normalizing this result is just a conditional left-shift by one
+    // and bumping the exponent accordingly.
+    rep_t productHi, productLo;
+    wideMultiply(aSignificand, bSignificand << exponentBits,
+                 &productHi, &productLo);
+    
+    int productExponent = aExponent + bExponent - exponentBias + scale;
+    
+    // Normalize the significand, adjust exponent if needed.
+    if (productHi & implicitBit) productExponent++;
+    else wideLeftShift(&productHi, &productLo, 1);
+    
+    // If we have overflowed the type, return +/- infinity.
+    if (productExponent >= maxExponent) return fromRep(infRep | productSign);
+    
+    if (productExponent <= 0) {
+        // Result is denormal before rounding, the exponent is zero and we
+        // need to shift the significand.
+        wideRightShiftWithSticky(&productHi, &productLo, 1 - productExponent);
+    }
+    
+    else {
+        // Result is normal before rounding; insert the exponent.
+        productHi &= significandMask;
+        productHi |= (rep_t)productExponent << significandBits;
+    }
+    
+    // Insert the sign of the result:
+    productHi |= productSign;
+    
+    // Final rounding.  The final result may overflow to infinity, or underflow
+    // to zero, but those are the correct results in those cases.
+    if (productLo > signBit) productHi++;
+    if (productLo == signBit) productHi += productHi & 1;
+    return fromRep(productHi);
+}
diff --git a/lib/negdf2.c b/lib/negdf2.c
new file mode 100644
index 000000000..edc2a6c82
--- /dev/null
+++ b/lib/negdf2.c
@@ -0,0 +1,13 @@
+/*
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#define DOUBLE_PRECISION
+#include "fp_lib.h"
+
+fp_t __negsf2(fp_t a) {
+    return fromRep(toRep(a) ^ signBit);
+}
diff --git a/lib/negsf2.c b/lib/negsf2.c
new file mode 100644
index 000000000..f96d19ccd
--- /dev/null
+++ b/lib/negsf2.c
@@ -0,0 +1,13 @@
+/*
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ */
+
+#define SINGLE_PRECISION
+#include "fp_lib.h"
+
+fp_t __negsf2(fp_t a) {
+    return fromRep(toRep(a) ^ signBit);
+}
-- 
cgit v1.2.3