Bug 1376975 - Formally verified Curve25519 (64 bits) from HaCl* r=mt,franziskus
authorBenjamin Beurdouche <benjamin.beurdouche@inria.fr>
Thu, 31 Aug 2017 14:47:12 +0200
changeset 13555 3df3d4a82736
parent 13554 07d0f4131459
child 13556 7fcf7848095c
push id2344
push userfranziskuskiefer@gmail.com
push date2017-08-31 13:18 +0000
reviewersmt, franziskus
bugs1376975
Bug 1376975 - Formally verified Curve25519 (64 bits) from HaCl* r=mt,franziskus Summary: This patch replaces the legacy curve25519_64.c code by the formally verified HaCl* code. The new code has been proven to have functional correctness, memory safety and a set of side-channel resistance properties. Note: All files from the new `verified` folder are formally verified in F* but for 'kremlib.h' that remains in the trusted code base. Reviewers: franziskus, ekr, ttaubert, mt Differential Revision: https://nss-review.dev.mozaws.net/D395
lib/freebl/Makefile
lib/freebl/ecl/curve25519_64.c
lib/freebl/ecl/uint128.c
lib/freebl/ecl/uint128.h
lib/freebl/freebl.gyp
lib/freebl/freebl_base.gypi
lib/freebl/manifest.mn
lib/freebl/verified/fstar_uint128.h
lib/freebl/verified/hacl_curve25519_64.c
lib/freebl/verified/hacl_curve25519_64.h
lib/freebl/verified/kremlib.h
--- a/lib/freebl/Makefile
+++ b/lib/freebl/Makefile
@@ -526,25 +526,22 @@ ifndef NSS_DISABLE_CHACHAPOLY
         EXTRA_SRCS += chacha20.c
     endif # x86_64
 endif # NSS_DISABLE_CHACHAPOLY
 
 ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH)))
     # All intel architectures get the 64 bit version
     # With custom uint128 if necessary (faster than generic 32 bit version).
     ECL_SRCS += curve25519_64.c
+    VERIFIED_SRCS += hacl_curve25519_64.c
 else
     # All non intel architectures get the generic 32 bit implementation (slow!)
     ECL_SRCS += curve25519_32.c
 endif
 
-ifndef HAVE_INT128_SUPPORT
-    ECL_SRCS += uint128.c
-endif
-
 #######################################################################
 # (5) Execute "global" rules. (OPTIONAL)                              #
 #######################################################################
 
 include $(CORE_DEPTH)/coreconf/rules.mk
 
 #######################################################################
 # (6) Execute "component" rules. (OPTIONAL)                           #
@@ -558,22 +555,22 @@ include $(CORE_DEPTH)/coreconf/rules.mk
 
 export:: private_export
 
 rijndael_tables:
 	$(CC) -o $(OBJDIR)/make_rijndael_tab rijndael_tables.c \
 	         $(DEFINES) $(INCLUDES) $(OBJDIR)/libfreebl.a
 	$(OBJDIR)/make_rijndael_tab
 
-vpath %.h mpi ecl
-vpath %.c mpi ecl
+vpath %.h mpi ecl verified
+vpath %.c mpi ecl verified
 vpath %.S mpi ecl
 vpath %.s mpi ecl
 vpath %.asm mpi ecl
-INCLUDES += -Impi -Iecl
+INCLUDES += -Impi -Iecl -Iverified
 
 
 DEFINES += -DMP_API_COMPATIBLE
 
 MPI_USERS = dh.c pqg.c dsa.c rsa.c ec.c
 
 MPI_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_SRCS:.c=$(OBJ_SUFFIX)))
 MPI_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_USERS:.c=$(OBJ_SUFFIX)))
@@ -582,16 +579,19 @@ MPI_OBJS += $(addprefix $(OBJDIR)/$(PROG
 
 ECL_USERS = ec.c
 
 ECL_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_SRCS:.c=$(OBJ_SUFFIX)) $(ECL_ASM_SRCS:$(ASM_SUFFIX)=$(OBJ_SUFFIX)))
 ECL_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_USERS:.c=$(OBJ_SUFFIX)))
 
 $(ECL_OBJS): $(ECL_HDRS)
 
+VERIFIED_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(VERIFIED_SRCS:.c=$(OBJ_SUFFIX)))
+
+$(VERIFIED_OBJS): $(VERIFIED_HDRS)
 
 
 $(OBJDIR)/sysrand$(OBJ_SUFFIX): sysrand.c unix_rand.c win_rand.c
 
 $(OBJDIR)/$(PROG_PREFIX)mpprime$(OBJ_SUFFIX): primes.c
 
 $(OBJDIR)/ldvector$(OBJ_SUFFIX) $(OBJDIR)/loader$(OBJ_SUFFIX) : loader.h
 
--- a/lib/freebl/ecl/curve25519_64.c
+++ b/lib/freebl/ecl/curve25519_64.c
@@ -1,514 +1,14 @@
 /* This Source Code Form is subject to the terms of the Mozilla Public
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-/*
- * Derived from public domain C code by Adan Langley and Daniel J. Bernstein
- */
-
-#include "uint128.h"
-
 #include "ecl-priv.h"
-#include "mpi.h"
-
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-
-typedef uint8_t u8;
-typedef uint64_t felem;
-
-/* Sum two numbers: output += in */
-static void
-fsum(felem *output, const felem *in)
-{
-    unsigned i;
-    for (i = 0; i < 5; ++i) {
-        output[i] += in[i];
-    }
-}
-
-/* Find the difference of two numbers: output = in - output
- * (note the order of the arguments!)
- */
-static void
-fdifference_backwards(felem *ioutput, const felem *iin)
-{
-    static const int64_t twotothe51 = ((int64_t)1l << 51);
-    const int64_t *in = (const int64_t *)iin;
-    int64_t *out = (int64_t *)ioutput;
-
-    out[0] = in[0] - out[0];
-    out[1] = in[1] - out[1];
-    out[2] = in[2] - out[2];
-    out[3] = in[3] - out[3];
-    out[4] = in[4] - out[4];
-
-    // An arithmetic shift right of 63 places turns a positive number to 0 and a
-    // negative number to all 1's. This gives us a bitmask that lets us avoid
-    // side-channel prone branches.
-    int64_t t;
-
-#define NEGCHAIN(a, b)        \
-    t = out[a] >> 63;         \
-    out[a] += twotothe51 & t; \
-    out[b] -= 1 & t;
-
-#define NEGCHAIN19(a, b)      \
-    t = out[a] >> 63;         \
-    out[a] += twotothe51 & t; \
-    out[b] -= 19 & t;
-
-    NEGCHAIN(0, 1);
-    NEGCHAIN(1, 2);
-    NEGCHAIN(2, 3);
-    NEGCHAIN(3, 4);
-    NEGCHAIN19(4, 0);
-    NEGCHAIN(0, 1);
-    NEGCHAIN(1, 2);
-    NEGCHAIN(2, 3);
-    NEGCHAIN(3, 4);
-}
-
-/* Multiply a number by a scalar: output = in * scalar */
-static void
-fscalar_product(felem *output, const felem *in,
-                const felem scalar)
-{
-    uint128_t tmp, tmp2;
-
-    tmp = mul6464(in[0], scalar);
-    output[0] = mask51(tmp);
-
-    tmp2 = mul6464(in[1], scalar);
-    tmp = add128(tmp2, rshift128(tmp, 51));
-    output[1] = mask51(tmp);
-
-    tmp2 = mul6464(in[2], scalar);
-    tmp = add128(tmp2, rshift128(tmp, 51));
-    output[2] = mask51(tmp);
-
-    tmp2 = mul6464(in[3], scalar);
-    tmp = add128(tmp2, rshift128(tmp, 51));
-    output[3] = mask51(tmp);
-
-    tmp2 = mul6464(in[4], scalar);
-    tmp = add128(tmp2, rshift128(tmp, 51));
-    output[4] = mask51(tmp);
-
-    output[0] += mask_lower(rshift128(tmp, 51)) * 19;
-}
-
-/* Multiply two numbers: output = in2 * in
- *
- * output must be distinct to both inputs. The inputs are reduced coefficient
- * form, the output is not.
- */
-static void
-fmul(felem *output, const felem *in2, const felem *in)
-{
-    uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
-
-    t0 = mul6464(in[0], in2[0]);
-    t1 = add128(mul6464(in[1], in2[0]), mul6464(in[0], in2[1]));
-    t2 = add128(add128(mul6464(in[0], in2[2]),
-                       mul6464(in[2], in2[0])),
-                mul6464(in[1], in2[1]));
-    t3 = add128(add128(add128(mul6464(in[0], in2[3]),
-                              mul6464(in[3], in2[0])),
-                       mul6464(in[1], in2[2])),
-                mul6464(in[2], in2[1]));
-    t4 = add128(add128(add128(add128(mul6464(in[0], in2[4]),
-                                     mul6464(in[4], in2[0])),
-                              mul6464(in[3], in2[1])),
-                       mul6464(in[1], in2[3])),
-                mul6464(in[2], in2[2]));
-    t5 = add128(add128(add128(mul6464(in[4], in2[1]),
-                              mul6464(in[1], in2[4])),
-                       mul6464(in[2], in2[3])),
-                mul6464(in[3], in2[2]));
-    t6 = add128(add128(mul6464(in[4], in2[2]),
-                       mul6464(in[2], in2[4])),
-                mul6464(in[3], in2[3]));
-    t7 = add128(mul6464(in[3], in2[4]), mul6464(in[4], in2[3]));
-    t8 = mul6464(in[4], in2[4]);
-
-    t0 = add128(t0, mul12819(t5));
-    t1 = add128(t1, mul12819(t6));
-    t2 = add128(t2, mul12819(t7));
-    t3 = add128(t3, mul12819(t8));
-
-    t1 = add128(t1, rshift128(t0, 51));
-    t0 = mask51full(t0);
-    t2 = add128(t2, rshift128(t1, 51));
-    t1 = mask51full(t1);
-    t3 = add128(t3, rshift128(t2, 51));
-    t4 = add128(t4, rshift128(t3, 51));
-    t0 = add128(t0, mul12819(rshift128(t4, 51)));
-    t1 = add128(t1, rshift128(t0, 51));
-    t2 = mask51full(t2);
-    t2 = add128(t2, rshift128(t1, 51));
-
-    output[0] = mask51(t0);
-    output[1] = mask51(t1);
-    output[2] = mask_lower(t2);
-    output[3] = mask51(t3);
-    output[4] = mask51(t4);
-}
-
-static void
-fsquare(felem *output, const felem *in)
-{
-    uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
-
-    t0 = mul6464(in[0], in[0]);
-    t1 = lshift128(mul6464(in[0], in[1]), 1);
-    t2 = add128(lshift128(mul6464(in[0], in[2]), 1),
-                mul6464(in[1], in[1]));
-    t3 = add128(lshift128(mul6464(in[0], in[3]), 1),
-                lshift128(mul6464(in[1], in[2]), 1));
-    t4 = add128(add128(lshift128(mul6464(in[0], in[4]), 1),
-                       lshift128(mul6464(in[3], in[1]), 1)),
-                mul6464(in[2], in[2]));
-    t5 = add128(lshift128(mul6464(in[4], in[1]), 1),
-                lshift128(mul6464(in[2], in[3]), 1));
-    t6 = add128(lshift128(mul6464(in[4], in[2]), 1),
-                mul6464(in[3], in[3]));
-    t7 = lshift128(mul6464(in[3], in[4]), 1);
-    t8 = mul6464(in[4], in[4]);
-
-    t0 = add128(t0, mul12819(t5));
-    t1 = add128(t1, mul12819(t6));
-    t2 = add128(t2, mul12819(t7));
-    t3 = add128(t3, mul12819(t8));
-
-    t1 = add128(t1, rshift128(t0, 51));
-    t0 = mask51full(t0);
-    t2 = add128(t2, rshift128(t1, 51));
-    t1 = mask51full(t1);
-    t3 = add128(t3, rshift128(t2, 51));
-    t4 = add128(t4, rshift128(t3, 51));
-    t0 = add128(t0, mul12819(rshift128(t4, 51)));
-    t1 = add128(t1, rshift128(t0, 51));
-
-    output[0] = mask51(t0);
-    output[1] = mask_lower(t1);
-    output[2] = mask51(t2);
-    output[3] = mask51(t3);
-    output[4] = mask51(t4);
-}
-
-/* Take a 32-byte number and expand it into polynomial form */
-static void NO_SANITIZE_ALIGNMENT
-fexpand(felem *output, const u8 *in)
-{
-    output[0] = *((const uint64_t *)(in)) & MASK51;
-    output[1] = (*((const uint64_t *)(in + 6)) >> 3) & MASK51;
-    output[2] = (*((const uint64_t *)(in + 12)) >> 6) & MASK51;
-    output[3] = (*((const uint64_t *)(in + 19)) >> 1) & MASK51;
-    output[4] = (*((const uint64_t *)(in + 24)) >> 12) & MASK51;
-}
-
-/* Take a fully reduced polynomial form number and contract it into a
- * 32-byte array
- */
-static void
-fcontract(u8 *output, const felem *input)
-{
-    uint128_t t0 = init128x(input[0]);
-    uint128_t t1 = init128x(input[1]);
-    uint128_t t2 = init128x(input[2]);
-    uint128_t t3 = init128x(input[3]);
-    uint128_t t4 = init128x(input[4]);
-    uint128_t tmp = init128x(19);
-
-    t1 = add128(t1, rshift128(t0, 51));
-    t0 = mask51full(t0);
-    t2 = add128(t2, rshift128(t1, 51));
-    t1 = mask51full(t1);
-    t3 = add128(t3, rshift128(t2, 51));
-    t2 = mask51full(t2);
-    t4 = add128(t4, rshift128(t3, 51));
-    t3 = mask51full(t3);
-    t0 = add128(t0, mul12819(rshift128(t4, 51)));
-    t4 = mask51full(t4);
-
-    t1 = add128(t1, rshift128(t0, 51));
-    t0 = mask51full(t0);
-    t2 = add128(t2, rshift128(t1, 51));
-    t1 = mask51full(t1);
-    t3 = add128(t3, rshift128(t2, 51));
-    t2 = mask51full(t2);
-    t4 = add128(t4, rshift128(t3, 51));
-    t3 = mask51full(t3);
-    t0 = add128(t0, mul12819(rshift128(t4, 51)));
-    t4 = mask51full(t4);
-
-    /* now t is between 0 and 2^255-1, properly carried. */
-    /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
-
-    t0 = add128(t0, tmp);
-
-    t1 = add128(t1, rshift128(t0, 51));
-    t0 = mask51full(t0);
-    t2 = add128(t2, rshift128(t1, 51));
-    t1 = mask51full(t1);
-    t3 = add128(t3, rshift128(t2, 51));
-    t2 = mask51full(t2);
-    t4 = add128(t4, rshift128(t3, 51));
-    t3 = mask51full(t3);
-    t0 = add128(t0, mul12819(rshift128(t4, 51)));
-    t4 = mask51full(t4);
-
-    /* now between 19 and 2^255-1 in both cases, and offset by 19. */
-
-    t0 = add128(t0, init128x(0x8000000000000 - 19));
-    tmp = init128x(0x8000000000000 - 1);
-    t1 = add128(t1, tmp);
-    t2 = add128(t2, tmp);
-    t3 = add128(t3, tmp);
-    t4 = add128(t4, tmp);
-
-    /* now between 2^255 and 2^256-20, and offset by 2^255. */
-
-    t1 = add128(t1, rshift128(t0, 51));
-    t0 = mask51full(t0);
-    t2 = add128(t2, rshift128(t1, 51));
-    t1 = mask51full(t1);
-    t3 = add128(t3, rshift128(t2, 51));
-    t2 = mask51full(t2);
-    t4 = add128(t4, rshift128(t3, 51));
-    t3 = mask51full(t3);
-    t4 = mask51full(t4);
-
-    *((uint64_t *)(output)) = mask_lower(t0) | mask_lower(t1) << 51;
-    *((uint64_t *)(output + 8)) = (mask_lower(t1) >> 13) | (mask_lower(t2) << 38);
-    *((uint64_t *)(output + 16)) = (mask_lower(t2) >> 26) | (mask_lower(t3) << 25);
-    *((uint64_t *)(output + 24)) = (mask_lower(t3) >> 39) | (mask_lower(t4) << 12);
-}
-
-/* Input: Q, Q', Q-Q'
- * Output: 2Q, Q+Q'
- *
- *   x2 z3: long form
- *   x3 z3: long form
- *   x z: short form, destroyed
- *   xprime zprime: short form, destroyed
- *   qmqp: short form, preserved
- */
-static void
-fmonty(felem *x2, felem *z2,         /* output 2Q */
-       felem *x3, felem *z3,         /* output Q + Q' */
-       felem *x, felem *z,           /* input Q */
-       felem *xprime, felem *zprime, /* input Q' */
-       const felem *qmqp /* input Q - Q' */)
-{
-    felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5],
-        zzzprime[5];
-
-    memcpy(origx, x, 5 * sizeof(felem));
-    fsum(x, z);
-    fdifference_backwards(z, origx); // does x - z
-
-    memcpy(origxprime, xprime, sizeof(felem) * 5);
-    fsum(xprime, zprime);
-    fdifference_backwards(zprime, origxprime);
-    fmul(xxprime, xprime, z);
-    fmul(zzprime, x, zprime);
-    memcpy(origxprime, xxprime, sizeof(felem) * 5);
-    fsum(xxprime, zzprime);
-    fdifference_backwards(zzprime, origxprime);
-    fsquare(x3, xxprime);
-    fsquare(zzzprime, zzprime);
-    fmul(z3, zzzprime, qmqp);
-
-    fsquare(xx, x);
-    fsquare(zz, z);
-    fmul(x2, xx, zz);
-    fdifference_backwards(zz, xx); // does zz = xx - zz
-    fscalar_product(zzz, zz, 121665);
-    fsum(zzz, xx);
-    fmul(z2, zz, zzz);
-}
-
-// -----------------------------------------------------------------------------
-// Maybe swap the contents of two felem arrays (@a and @b), each @len elements
-// long. Perform the swap iff @swap is non-zero.
-//
-// This function performs the swap without leaking any side-channel
-// information.
-// -----------------------------------------------------------------------------
-static void
-swap_conditional(felem *a, felem *b, unsigned len, felem iswap)
-{
-    unsigned i;
-    const felem swap = 1 + ~iswap;
-
-    for (i = 0; i < len; ++i) {
-        const felem x = swap & (a[i] ^ b[i]);
-        a[i] ^= x;
-        b[i] ^= x;
-    }
-}
-
-/* Calculates nQ where Q is the x-coordinate of a point on the curve
- *
- *   resultx/resultz: the x coordinate of the resulting curve point (short form)
- *   n: a 32-byte number
- *   q: a point of the curve (short form)
- */
-static void
-cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q)
-{
-    felem a[5] = { 0 }, b[5] = { 1 }, c[5] = { 1 }, d[5] = { 0 };
-    felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
-    felem e[5] = { 0 }, f[5] = { 1 }, g[5] = { 0 }, h[5] = { 1 };
-    felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
-
-    unsigned i, j;
-
-    memcpy(nqpqx, q, sizeof(felem) * 5);
-
-    for (i = 0; i < 32; ++i) {
-        u8 byte = n[31 - i];
-        for (j = 0; j < 8; ++j) {
-            const felem bit = byte >> 7;
-
-            swap_conditional(nqx, nqpqx, 5, bit);
-            swap_conditional(nqz, nqpqz, 5, bit);
-            fmonty(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q);
-            swap_conditional(nqx2, nqpqx2, 5, bit);
-            swap_conditional(nqz2, nqpqz2, 5, bit);
-
-            t = nqx;
-            nqx = nqx2;
-            nqx2 = t;
-            t = nqz;
-            nqz = nqz2;
-            nqz2 = t;
-            t = nqpqx;
-            nqpqx = nqpqx2;
-            nqpqx2 = t;
-            t = nqpqz;
-            nqpqz = nqpqz2;
-            nqpqz2 = t;
-
-            byte <<= 1;
-        }
-    }
-
-    memcpy(resultx, nqx, sizeof(felem) * 5);
-    memcpy(resultz, nqz, sizeof(felem) * 5);
-}
-
-// -----------------------------------------------------------------------------
-// Shamelessly copied from djb's code
-// -----------------------------------------------------------------------------
-static void
-crecip(felem *out, const felem *z)
-{
-    felem z2[5];
-    felem z9[5];
-    felem z11[5];
-    felem z2_5_0[5];
-    felem z2_10_0[5];
-    felem z2_20_0[5];
-    felem z2_50_0[5];
-    felem z2_100_0[5];
-    felem t0[5];
-    felem t1[5];
-    int i;
-
-    /* 2 */ fsquare(z2, z);
-    /* 4 */ fsquare(t1, z2);
-    /* 8 */ fsquare(t0, t1);
-    /* 9 */ fmul(z9, t0, z);
-    /* 11 */ fmul(z11, z9, z2);
-    /* 22 */ fsquare(t0, z11);
-    /* 2^5 - 2^0 = 31 */ fmul(z2_5_0, t0, z9);
-
-    /* 2^6 - 2^1 */ fsquare(t0, z2_5_0);
-    /* 2^7 - 2^2 */ fsquare(t1, t0);
-    /* 2^8 - 2^3 */ fsquare(t0, t1);
-    /* 2^9 - 2^4 */ fsquare(t1, t0);
-    /* 2^10 - 2^5 */ fsquare(t0, t1);
-    /* 2^10 - 2^0 */ fmul(z2_10_0, t0, z2_5_0);
-
-    /* 2^11 - 2^1 */ fsquare(t0, z2_10_0);
-    /* 2^12 - 2^2 */ fsquare(t1, t0);
-    /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) {
-        fsquare(t0, t1);
-        fsquare(t1, t0);
-    }
-    /* 2^20 - 2^0 */ fmul(z2_20_0, t1, z2_10_0);
-
-    /* 2^21 - 2^1 */ fsquare(t0, z2_20_0);
-    /* 2^22 - 2^2 */ fsquare(t1, t0);
-    /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) {
-        fsquare(t0, t1);
-        fsquare(t1, t0);
-    }
-    /* 2^40 - 2^0 */ fmul(t0, t1, z2_20_0);
-
-    /* 2^41 - 2^1 */ fsquare(t1, t0);
-    /* 2^42 - 2^2 */ fsquare(t0, t1);
-    /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) {
-        fsquare(t1, t0);
-        fsquare(t0, t1);
-    }
-    /* 2^50 - 2^0 */ fmul(z2_50_0, t0, z2_10_0);
-
-    /* 2^51 - 2^1 */ fsquare(t0, z2_50_0);
-    /* 2^52 - 2^2 */ fsquare(t1, t0);
-    /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) {
-        fsquare(t0, t1);
-        fsquare(t1, t0);
-    }
-    /* 2^100 - 2^0 */ fmul(z2_100_0, t1, z2_50_0);
-
-    /* 2^101 - 2^1 */ fsquare(t1, z2_100_0);
-    /* 2^102 - 2^2 */ fsquare(t0, t1);
-    /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) {
-        fsquare(t1, t0);
-        fsquare(t0, t1);
-    }
-    /* 2^200 - 2^0 */ fmul(t1, t0, z2_100_0);
-
-    /* 2^201 - 2^1 */ fsquare(t0, t1);
-    /* 2^202 - 2^2 */ fsquare(t1, t0);
-    /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) {
-        fsquare(t0, t1);
-        fsquare(t1, t0);
-    }
-    /* 2^250 - 2^0 */ fmul(t0, t1, z2_50_0);
-
-    /* 2^251 - 2^1 */ fsquare(t1, t0);
-    /* 2^252 - 2^2 */ fsquare(t0, t1);
-    /* 2^253 - 2^3 */ fsquare(t1, t0);
-    /* 2^254 - 2^4 */ fsquare(t0, t1);
-    /* 2^255 - 2^5 */ fsquare(t1, t0);
-    /* 2^255 - 21 */ fmul(out, t1, z11);
-}
+#include "../verified/hacl_curve25519_64.h"
 
 SECStatus
-ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret,
-                  const uint8_t *basepoint)
+ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret, const uint8_t *basepoint)
 {
-    felem bp[5], x[5], z[5], zmone[5];
-    uint8_t e[32];
-    int i;
-
-    for (i = 0; i < 32; ++i) {
-        e[i] = secret[i];
-    }
-    e[0] &= 248;
-    e[31] &= 127;
-    e[31] |= 64;
-    fexpand(bp, basepoint);
-    cmult(x, z, e, bp);
-    crecip(zmone, z);
-    fmul(z, x, zmone);
-    fcontract(mypublic, z);
-
+    // Note: this cast is safe because HaCl* state has a post-condition that only "mypublic" changed.
+    Curve25519_crypto_scalarmult(mypublic, (uint8_t *)secret, (uint8_t *)basepoint);
     return 0;
 }
deleted file mode 100644
--- a/lib/freebl/ecl/uint128.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include "uint128.h"
-
-/* helper functions */
-uint64_t
-mask51(uint128_t x)
-{
-    return x.lo & MASK51;
-}
-
-uint64_t
-mask_lower(uint128_t x)
-{
-    return x.lo;
-}
-
-uint128_t
-mask51full(uint128_t x)
-{
-    uint128_t ret = { x.lo & MASK51, 0 };
-    return ret;
-}
-
-uint128_t
-init128x(uint64_t x)
-{
-    uint128_t ret = { x, 0 };
-    return ret;
-}
-
-#define CONSTANT_TIME_CARRY(a, b) \
-    ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1))
-
-/* arithmetic */
-
-uint128_t
-add128(uint128_t a, uint128_t b)
-{
-    uint128_t ret;
-    ret.lo = a.lo + b.lo;
-    ret.hi = a.hi + b.hi + CONSTANT_TIME_CARRY(ret.lo, b.lo);
-    return ret;
-}
-
-/* out = 19 * a */
-uint128_t
-mul12819(uint128_t a)
-{
-    uint128_t ret = lshift128(a, 4);
-    ret = add128(ret, a);
-    ret = add128(ret, a);
-    ret = add128(ret, a);
-    return ret;
-}
-
-uint128_t
-mul6464(uint64_t a, uint64_t b)
-{
-    uint128_t ret;
-    uint64_t t0 = ((uint64_t)(uint32_t)a) * ((uint64_t)(uint32_t)b);
-    uint64_t t1 = (a >> 32) * ((uint64_t)(uint32_t)b) + (t0 >> 32);
-    uint64_t t2 = (b >> 32) * ((uint64_t)(uint32_t)a) + ((uint32_t)t1);
-    ret.lo = (((uint64_t)((uint32_t)t2)) << 32) + ((uint32_t)t0);
-    ret.hi = (a >> 32) * (b >> 32);
-    ret.hi += (t2 >> 32) + (t1 >> 32);
-    return ret;
-}
-
-/* only defined for n < 64 */
-uint128_t
-rshift128(uint128_t x, uint8_t n)
-{
-    uint128_t ret;
-    ret.lo = (x.lo >> n) + (x.hi << (64 - n));
-    ret.hi = x.hi >> n;
-    return ret;
-}
-
-/* only defined for n < 64 */
-uint128_t
-lshift128(uint128_t x, uint8_t n)
-{
-    uint128_t ret;
-    ret.hi = (x.hi << n) + (x.lo >> (64 - n));
-    ret.lo = x.lo << n;
-    return ret;
-}
deleted file mode 100644
--- a/lib/freebl/ecl/uint128.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#include <stdint.h>
-
-#define MASK51 0x7ffffffffffffULL
-
-#ifdef HAVE_INT128_SUPPORT
-typedef unsigned __int128 uint128_t;
-#define add128(a, b) (a) + (b)
-#define mul6464(a, b) (uint128_t)(a) * (uint128_t)(b)
-#define mul12819(a) (uint128_t)(a) * 19
-#define rshift128(x, n) (x) >> (n)
-#define lshift128(x, n) (x) << (n)
-#define mask51(x) (x) & 0x7ffffffffffff
-#define mask_lower(x) (uint64_t)(x)
-#define mask51full(x) (x) & 0x7ffffffffffff
-#define init128x(x) (x)
-#else /* uint128_t for Windows and 32 bit intel systems */
-struct uint128_t_str {
-    uint64_t lo;
-    uint64_t hi;
-};
-typedef struct uint128_t_str uint128_t;
-uint128_t add128(uint128_t a, uint128_t b);
-uint128_t mul6464(uint64_t a, uint64_t b);
-uint128_t mul12819(uint128_t a);
-uint128_t rshift128(uint128_t x, uint8_t n);
-uint128_t lshift128(uint128_t x, uint8_t n);
-uint64_t mask51(uint128_t x);
-uint64_t mask_lower(uint128_t x);
-uint128_t mask51full(uint128_t x);
-uint128_t init128x(uint64_t x);
-#endif
--- a/lib/freebl/freebl.gyp
+++ b/lib/freebl/freebl.gyp
@@ -137,17 +137,18 @@
           }
         },
       ],
     }],
   ],
   'target_defaults': {
     'include_dirs': [
       'mpi',
-      'ecl'
+      'ecl',
+      'verified',
     ],
     'defines': [
       'SHLIB_SUFFIX=\"<(dll_suffix)\"',
       'SHLIB_PREFIX=\"<(dll_prefix)\"',
       'SHLIB_VERSION=\"3\"',
       'SOFTOKEN_SHLIB_VERSION=\"3\"',
       'RIJNDAEL_INCLUDE_TABLES',
       'MP_API_COMPATIBLE'
@@ -224,20 +225,16 @@
       }],
       [ 'OS!="win"', {
         'conditions': [
           [ 'target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', {
             'defines': [
               # The Makefile does version-tests on GCC, but we're not doing that here.
               'HAVE_INT128_SUPPORT',
             ],
-          }, {
-            'sources': [
-              'ecl/uint128.c',
-            ],
           }],
         ],
       }],
       [ 'OS=="linux"', {
         'defines': [
           'FREEBL_LOWHASH',
           'FREEBL_NO_DEPEND',
         ],
--- a/lib/freebl/freebl_base.gypi
+++ b/lib/freebl/freebl_base.gypi
@@ -93,20 +93,16 @@
         [ 'target_arch=="arm"', {
           'sources': [
             'mpi/mpi_arm.c',
           ],
         }],
       ],
     }],
     [ 'OS=="win"', {
-      'sources': [
-        #TODO: building with mingw should not need this.
-        'ecl/uint128.c',
-      ],
       'libraries': [
         'advapi32.lib',
       ],
       'conditions': [
         [ 'cc_use_gnu_ld!=1 and target_arch=="x64"', {
           'sources': [
             'arcfour-amd64-masm.asm',
             'mpi/mpi_amd64.c',
@@ -131,24 +127,24 @@
           ],
         }],
       ],
     }],
     ['target_arch=="ia32" or target_arch=="x64"', {
       'sources': [
         # All intel architectures get the 64 bit version
         'ecl/curve25519_64.c',
+        'verified/hacl_curve25519_64.c',
       ],
     }, {
       'sources': [
         # All non intel architectures get the generic 32 bit implementation (slow!)
         'ecl/curve25519_32.c',
       ],
     }],
-    #TODO uint128.c
     [ 'disable_chachapoly==0', {
       'conditions': [
         [ 'OS!="win" and target_arch=="x64"', {
           'sources': [
             'chacha20_vec.c',
             'poly1305-donna-x64-sse2-incremental-source.c',
           ],
         }, {
--- a/lib/freebl/manifest.mn
+++ b/lib/freebl/manifest.mn
@@ -1,15 +1,15 @@
-# 
+#
 # This Source Code Form is subject to the terms of the Mozilla Public
 # License, v. 2.0. If a copy of the MPL was not distributed with this
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # NOTE: any ifdefs in this file must be defined on the gmake command line
-# (if anywhere).  They cannot come from Makefile or config.mk 
+# (if anywhere).  They cannot come from Makefile or config.mk
 
 CORE_DEPTH = ../..
 
 MODULE = nss
 
 # copied from Linux.mk. We have a chicken and egg issue here. We need to set
 # Library name before we call the platform code in coreconf, but we need to
 # Pick up the automatic setting of FREEBL_LOWHASH before we  can set the
@@ -70,17 +70,17 @@ endif
 MAPFILE = $(OBJDIR)/$(LIBRARY_NAME).def
 
 SOFTOKEN_LIBRARY_VERSION = 3
 
 DEFINES += -DSHLIB_SUFFIX=\"$(DLL_SUFFIX)\" -DSHLIB_PREFIX=\"$(DLL_PREFIX)\" \
 	-DSHLIB_VERSION=\"$(LIBRARY_VERSION)\" \
 	-DSOFTOKEN_SHLIB_VERSION=\"$(SOFTOKEN_LIBRARY_VERSION)\"
 
-REQUIRES = 
+REQUIRES =
 
 EXPORTS = \
 	blapit.h \
 	shsign.h \
 	ecl-exp.h \
 	$(LOWHASH_EXPORTS) \
 	$(NULL)
 
@@ -107,16 +107,17 @@ ECL_SRCS = ecl.c ecl_mult.c ecl_gf.c \
 	ecp_aff.c ecp_jac.c ecp_mont.c \
 	ec_naf.c ecp_jm.c ecp_256.c ecp_384.c ecp_521.c \
 	ecp_256_32.c ecp_25519.c
 else
 ECL_SRCS = $(NULL)
 endif
 SHA_SRCS = sha_fast.c
 MPCPU_SRCS = mpcpucache.c
+VERIFIED_SRCS = $(NULL)
 
 CSRCS = \
 	freeblver.c \
 	ldvector.c \
 	sysrand.c \
 	$(SHA_SRCS) \
 	md2.c \
 	md5.c \
@@ -148,16 +149,17 @@ CSRCS = \
 	rsapkcs.c \
 	shvfy.c \
 	tlsprfalg.c \
 	seed.c \
 	jpake.c \
 	$(MPI_SRCS) \
 	$(MPCPU_SRCS) \
 	$(ECL_SRCS) \
+	$(VERIFIED_SRCS) \
 	$(STUBS_SRCS) \
 	$(LOWHASH_SRCS) \
 	$(EXTRA_SRCS) \
 	$(NULL)
 
 ALL_CSRCS := $(CSRCS)
 
 ALL_HDRS =  \
@@ -175,17 +177,17 @@ ALL_HDRS =  \
 	shsign.h \
 	vis_proto.h \
 	seed.h \
 	$(NULL)
 
 
 ifdef AES_GEN_TBL
 DEFINES += -DRIJNDAEL_GENERATE_TABLES
-else 
+else
 ifdef AES_GEN_TBL_M
 DEFINES += -DRIJNDAEL_GENERATE_TABLES_MACRO
 else
 ifdef AES_GEN_VAL
 DEFINES += -DRIJNDAEL_GENERATE_VALUES
 else
 ifdef AES_GEN_VAL_M
 DEFINES += -DRIJNDAEL_GENERATE_VALUES_MACRO
new file mode 100644
--- /dev/null
+++ b/lib/freebl/verified/fstar_uint128.h
@@ -0,0 +1,291 @@
+// Copyright 2016-2017 Microsoft Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/* This file was auto-generated by KreMLin! */
+
+#ifndef __FStar_UInt128_H
+#define __FStar_UInt128_H
+
+typedef struct
+{
+    uint64_t low;
+    uint64_t high;
+} FStar_UInt128_uint128;
+
+typedef FStar_UInt128_uint128 FStar_UInt128_t;
+
+typedef struct
+{
+    uint64_t fst;
+    uint64_t snd;
+    uint64_t thd;
+    uint64_t f3;
+} K___uint64_t_uint64_t_uint64_t_uint64_t;
+
+static inline uint64_t
+FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b)
+{
+    return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63;
+}
+
+static inline uint64_t
+FStar_UInt128_carry(uint64_t a, uint64_t b)
+{
+    return FStar_UInt128_constant_time_carry(a, b);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return (
+        (FStar_UInt128_uint128){
+            .low = a.low + b.low,
+            .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return (
+        (FStar_UInt128_uint128){
+            .low = a.low + b.low,
+            .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return (
+        (FStar_UInt128_uint128){
+            .low = a.low - b.low,
+            .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return (
+        (FStar_UInt128_uint128){
+            .low = a.low - b.low,
+            .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return FStar_UInt128_sub_mod_impl(a, b);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return ((FStar_UInt128_uint128){.low = a.low & b.low, .high = a.high & b.high });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return ((FStar_UInt128_uint128){.low = a.low ^ b.low, .high = a.high ^ b.high });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return ((FStar_UInt128_uint128){.low = a.low | b.low, .high = a.high | b.high });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_lognot(FStar_UInt128_uint128 a)
+{
+    return ((FStar_UInt128_uint128){.low = ~a.low, .high = ~a.high });
+}
+
+static uint32_t FStar_UInt128_u32_64 = (uint32_t)64;
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s)
+{
+    return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s));
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+    return FStar_UInt128_add_u64_shift_left(hi, lo, s);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+    if (s == (uint32_t)0)
+        return a;
+    else
+        return (
+            (FStar_UInt128_uint128){
+                .low = a.low << s,
+                .high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s) });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+    return ((FStar_UInt128_uint128){.low = (uint64_t)0, .high = a.low << (s - FStar_UInt128_u32_64) });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s)
+{
+    if (s < FStar_UInt128_u32_64)
+        return FStar_UInt128_shift_left_small(a, s);
+    else
+        return FStar_UInt128_shift_left_large(a, s);
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s)
+{
+    return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s));
+}
+
+static inline uint64_t
+FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s)
+{
+    return FStar_UInt128_add_u64_shift_right(hi, lo, s);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s)
+{
+    if (s == (uint32_t)0)
+        return a;
+    else
+        return (
+            (FStar_UInt128_uint128){
+                .low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s),
+                .high = a.high >> s });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s)
+{
+    return ((FStar_UInt128_uint128){.low = a.high >> (s - FStar_UInt128_u32_64), .high = (uint64_t)0 });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s)
+{
+    if (s < FStar_UInt128_u32_64)
+        return FStar_UInt128_shift_right_small(a, s);
+    else
+        return FStar_UInt128_shift_right_large(a, s);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return (
+        (FStar_UInt128_uint128){
+            .low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high),
+            .high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high) });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b)
+{
+    return (
+        (FStar_UInt128_uint128){
+            .low = (FStar_UInt64_gte_mask(a.high,
+                                          b.high) &
+                    ~FStar_UInt64_eq_mask(a.high, b.high)) |
+                   (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)),
+            .high = (FStar_UInt64_gte_mask(a.high,
+                                           b.high) &
+                     ~FStar_UInt64_eq_mask(a.high, b.high)) |
+                    (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)) });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_uint64_to_uint128(uint64_t a)
+{
+    return ((FStar_UInt128_uint128){.low = a, .high = (uint64_t)0 });
+}
+
+static inline uint64_t
+FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a)
+{
+    return a.low;
+}
+
+static uint64_t FStar_UInt128_u64_l32_mask = (uint64_t)0xffffffff;
+
+static inline uint64_t
+FStar_UInt128_u64_mod_32(uint64_t a)
+{
+    return a & FStar_UInt128_u64_l32_mask;
+}
+
+static uint32_t FStar_UInt128_u32_32 = (uint32_t)32;
+
+static inline K___uint64_t_uint64_t_uint64_t_uint64_t
+FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y)
+{
+    return (
+        (K___uint64_t_uint64_t_uint64_t_uint64_t){
+            .fst = FStar_UInt128_u64_mod_32(x),
+            .snd = FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)),
+            .thd = x >> FStar_UInt128_u32_32,
+            .f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32) });
+}
+
+static inline uint64_t
+FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo)
+{
+    return lo + (hi << FStar_UInt128_u32_32);
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y)
+{
+    K___uint64_t_uint64_t_uint64_t_uint64_t scrut = FStar_UInt128_mul_wide_impl_t_(x, y);
+    uint64_t u1 = scrut.fst;
+    uint64_t w3 = scrut.snd;
+    uint64_t x_ = scrut.thd;
+    uint64_t t_ = scrut.f3;
+    return (
+        (FStar_UInt128_uint128){
+            .low = FStar_UInt128_u32_combine_(u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_),
+                                              w3),
+            .high = x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) +
+                    ((u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_)) >> FStar_UInt128_u32_32) });
+}
+
+static inline FStar_UInt128_uint128
+FStar_UInt128_mul_wide(uint64_t x, uint64_t y)
+{
+    return FStar_UInt128_mul_wide_impl(x, y);
+}
+
+static inline FStar_UInt128_uint128
+FStar_Int_Cast_Full_uint64_to_uint128(uint64_t a)
+{
+    return FStar_UInt128_uint64_to_uint128(a);
+}
+
+static inline uint64_t
+FStar_Int_Cast_Full_uint128_to_uint64(FStar_UInt128_uint128 a)
+{
+    return FStar_UInt128_uint128_to_uint64(a);
+}
+
+#endif
new file mode 100644
--- /dev/null
+++ b/lib/freebl/verified/hacl_curve25519_64.c
@@ -0,0 +1,1044 @@
+// Copyright 2016-2017 INRIA and Microsoft Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "hacl_curve25519_64.h"
+
+static void
+Hacl_Bignum_Modulo_carry_top(uint64_t *b)
+{
+    uint64_t b4 = b[4];
+    uint64_t b0 = b[0];
+    uint64_t mask = ((uint64_t)1 << (uint32_t)51) - (uint64_t)1;
+    uint64_t b4_ = b4 & mask;
+    uint64_t b0_ = b0 + (uint64_t)19 * (b4 >> (uint32_t)51);
+    b[4] = b4_;
+    b[0] = b0_;
+}
+
+inline static void
+Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, FStar_UInt128_t *input)
+{
+    {
+        FStar_UInt128_t uu____429 = input[0];
+        uint64_t uu____428 = FStar_Int_Cast_Full_uint128_to_uint64(uu____429);
+        output[0] = uu____428;
+    }
+    {
+        FStar_UInt128_t uu____429 = input[1];
+        uint64_t uu____428 = FStar_Int_Cast_Full_uint128_to_uint64(uu____429);
+        output[1] = uu____428;
+    }
+    {
+        FStar_UInt128_t uu____429 = input[2];
+        uint64_t uu____428 = FStar_Int_Cast_Full_uint128_to_uint64(uu____429);
+        output[2] = uu____428;
+    }
+    {
+        FStar_UInt128_t uu____429 = input[3];
+        uint64_t uu____428 = FStar_Int_Cast_Full_uint128_to_uint64(uu____429);
+        output[3] = uu____428;
+    }
+    {
+        FStar_UInt128_t uu____429 = input[4];
+        uint64_t uu____428 = FStar_Int_Cast_Full_uint128_to_uint64(uu____429);
+        output[4] = uu____428;
+    }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_shift(uint64_t *output)
+{
+    uint64_t tmp = output[4];
+    {
+        uint32_t ctr = (uint32_t)5 - (uint32_t)0 - (uint32_t)1;
+        uint64_t z = output[ctr - (uint32_t)1];
+        output[ctr] = z;
+    }
+    {
+        uint32_t ctr = (uint32_t)5 - (uint32_t)1 - (uint32_t)1;
+        uint64_t z = output[ctr - (uint32_t)1];
+        output[ctr] = z;
+    }
+    {
+        uint32_t ctr = (uint32_t)5 - (uint32_t)2 - (uint32_t)1;
+        uint64_t z = output[ctr - (uint32_t)1];
+        output[ctr] = z;
+    }
+    {
+        uint32_t ctr = (uint32_t)5 - (uint32_t)3 - (uint32_t)1;
+        uint64_t z = output[ctr - (uint32_t)1];
+        output[ctr] = z;
+    }
+    output[0] = tmp;
+}
+
+inline static void
+Hacl_Bignum_Fproduct_sum_scalar_multiplication_(
+    FStar_UInt128_t *output,
+    uint64_t *input,
+    uint64_t s)
+{
+    {
+        FStar_UInt128_t uu____871 = output[0];
+        uint64_t uu____874 = input[0];
+        FStar_UInt128_t
+            uu____870 = FStar_UInt128_add_mod(uu____871, FStar_UInt128_mul_wide(uu____874, s));
+        output[0] = uu____870;
+    }
+    {
+        FStar_UInt128_t uu____871 = output[1];
+        uint64_t uu____874 = input[1];
+        FStar_UInt128_t
+            uu____870 = FStar_UInt128_add_mod(uu____871, FStar_UInt128_mul_wide(uu____874, s));
+        output[1] = uu____870;
+    }
+    {
+        FStar_UInt128_t uu____871 = output[2];
+        uint64_t uu____874 = input[2];
+        FStar_UInt128_t
+            uu____870 = FStar_UInt128_add_mod(uu____871, FStar_UInt128_mul_wide(uu____874, s));
+        output[2] = uu____870;
+    }
+    {
+        FStar_UInt128_t uu____871 = output[3];
+        uint64_t uu____874 = input[3];
+        FStar_UInt128_t
+            uu____870 = FStar_UInt128_add_mod(uu____871, FStar_UInt128_mul_wide(uu____874, s));
+        output[3] = uu____870;
+    }
+    {
+        FStar_UInt128_t uu____871 = output[4];
+        uint64_t uu____874 = input[4];
+        FStar_UInt128_t
+            uu____870 = FStar_UInt128_add_mod(uu____871, FStar_UInt128_mul_wide(uu____874, s));
+        output[4] = uu____870;
+    }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_carry_wide_(FStar_UInt128_t *tmp)
+{
+    {
+        uint32_t ctr = (uint32_t)0;
+        FStar_UInt128_t tctr = tmp[ctr];
+        FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1];
+        uint64_t
+            r0 =
+                FStar_Int_Cast_Full_uint128_to_uint64(tctr) & (((uint64_t)1 << (uint32_t)51) - (uint64_t)1);
+        FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51);
+        tmp[ctr] = FStar_Int_Cast_Full_uint64_to_uint128(r0);
+        tmp[ctr + (uint32_t)1] = FStar_UInt128_add(tctrp1, c);
+    }
+    {
+        uint32_t ctr = (uint32_t)1;
+        FStar_UInt128_t tctr = tmp[ctr];
+        FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1];
+        uint64_t
+            r0 =
+                FStar_Int_Cast_Full_uint128_to_uint64(tctr) & (((uint64_t)1 << (uint32_t)51) - (uint64_t)1);
+        FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51);
+        tmp[ctr] = FStar_Int_Cast_Full_uint64_to_uint128(r0);
+        tmp[ctr + (uint32_t)1] = FStar_UInt128_add(tctrp1, c);
+    }
+    {
+        uint32_t ctr = (uint32_t)2;
+        FStar_UInt128_t tctr = tmp[ctr];
+        FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1];
+        uint64_t
+            r0 =
+                FStar_Int_Cast_Full_uint128_to_uint64(tctr) & (((uint64_t)1 << (uint32_t)51) - (uint64_t)1);
+        FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51);
+        tmp[ctr] = FStar_Int_Cast_Full_uint64_to_uint128(r0);
+        tmp[ctr + (uint32_t)1] = FStar_UInt128_add(tctrp1, c);
+    }
+    {
+        uint32_t ctr = (uint32_t)3;
+        FStar_UInt128_t tctr = tmp[ctr];
+        FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1];
+        uint64_t
+            r0 =
+                FStar_Int_Cast_Full_uint128_to_uint64(tctr) & (((uint64_t)1 << (uint32_t)51) - (uint64_t)1);
+        FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51);
+        tmp[ctr] = FStar_Int_Cast_Full_uint64_to_uint128(r0);
+        tmp[ctr + (uint32_t)1] = FStar_UInt128_add(tctrp1, c);
+    }
+}
+
+inline static void
+Hacl_Bignum_Fmul_shift_reduce(uint64_t *output)
+{
+    Hacl_Bignum_Fproduct_shift(output);
+    uint64_t b0 = output[0];
+    output[0] = (uint64_t)19 * b0;
+}
+
+static void
+Hacl_Bignum_Fmul_mul_shift_reduce_(FStar_UInt128_t *output, uint64_t *input, uint64_t *input21)
+{
+    {
+        uint32_t ctr = (uint32_t)5 - (uint32_t)0 - (uint32_t)1;
+        uint32_t i1 = ctr;
+        uint32_t j = (uint32_t)4 - i1;
+        uint64_t input2i = input21[j];
+        Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+        if (ctr > (uint32_t)0)
+            Hacl_Bignum_Fmul_shift_reduce(input);
+    }
+    {
+        uint32_t ctr = (uint32_t)5 - (uint32_t)1 - (uint32_t)1;
+        uint32_t i1 = ctr;
+        uint32_t j = (uint32_t)4 - i1;
+        uint64_t input2i = input21[j];
+        Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+        if (ctr > (uint32_t)0)
+            Hacl_Bignum_Fmul_shift_reduce(input);
+    }
+    {
+        uint32_t ctr = (uint32_t)5 - (uint32_t)2 - (uint32_t)1;
+        uint32_t i1 = ctr;
+        uint32_t j = (uint32_t)4 - i1;
+        uint64_t input2i = input21[j];
+        Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+        if (ctr > (uint32_t)0)
+            Hacl_Bignum_Fmul_shift_reduce(input);
+    }
+    {
+        uint32_t ctr = (uint32_t)5 - (uint32_t)3 - (uint32_t)1;
+        uint32_t i1 = ctr;
+        uint32_t j = (uint32_t)4 - i1;
+        uint64_t input2i = input21[j];
+        Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+        if (ctr > (uint32_t)0)
+            Hacl_Bignum_Fmul_shift_reduce(input);
+    }
+    {
+        uint32_t ctr = (uint32_t)5 - (uint32_t)4 - (uint32_t)1;
+        uint32_t i1 = ctr;
+        uint32_t j = (uint32_t)4 - i1;
+        uint64_t input2i = input21[j];
+        Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+        if (ctr > (uint32_t)0)
+            Hacl_Bignum_Fmul_shift_reduce(input);
+    }
+}
+
+inline static void
+Hacl_Bignum_Fmul_fmul_(uint64_t *output, uint64_t *input, uint64_t *input21)
+{
+    KRML_CHECK_SIZE(FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)0), (uint32_t)5);
+    FStar_UInt128_t t[5];
+    for (uintmax_t _i = 0; _i < (uint32_t)5; ++_i)
+        t[_i] = FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)0);
+    Hacl_Bignum_Fmul_mul_shift_reduce_(t, input, input21);
+    Hacl_Bignum_Fproduct_carry_wide_(t);
+    FStar_UInt128_t b4 = t[4];
+    FStar_UInt128_t b0 = t[0];
+    FStar_UInt128_t
+        mask =
+            FStar_UInt128_sub(FStar_UInt128_shift_left(FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)1),
+                                                       (uint32_t)51),
+                              FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)1));
+    FStar_UInt128_t b4_ = FStar_UInt128_logand(b4, mask);
+    FStar_UInt128_t
+        b0_ =
+            FStar_UInt128_add(b0,
+                              FStar_UInt128_mul_wide((uint64_t)19,
+                                                     FStar_Int_Cast_Full_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51))));
+    t[4] = b4_;
+    t[0] = b0_;
+    Hacl_Bignum_Fproduct_copy_from_wide_(output, t);
+    uint64_t i0 = output[0];
+    uint64_t i1 = output[1];
+    uint64_t i0_ = i0 & (((uint64_t)1 << (uint32_t)51) - (uint64_t)1);
+    uint64_t i1_ = i1 + (i0 >> (uint32_t)51);
+    output[0] = i0_;
+    output[1] = i1_;
+}
+
+inline static void
+Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input21)
+{
+    uint64_t tmp[5] = { 0 };
+    memcpy(tmp, input, (uint32_t)5 * sizeof input[0]);
+    Hacl_Bignum_Fmul_fmul_(output, tmp, input21);
+}
+
+inline static void
+Hacl_Bignum_Fsquare_upd_5(
+    FStar_UInt128_t *tmp,
+    FStar_UInt128_t s0,
+    FStar_UInt128_t s1,
+    FStar_UInt128_t s2,
+    FStar_UInt128_t s3,
+    FStar_UInt128_t s4)
+{
+    tmp[0] = s0;
+    tmp[1] = s1;
+    tmp[2] = s2;
+    tmp[3] = s3;
+    tmp[4] = s4;
+}
+
+inline static void
+Hacl_Bignum_Fsquare_fsquare__(FStar_UInt128_t *tmp, uint64_t *output)
+{
+    uint64_t r0 = output[0];
+    uint64_t r1 = output[1];
+    uint64_t r2 = output[2];
+    uint64_t r3 = output[3];
+    uint64_t r4 = output[4];
+    uint64_t d0 = r0 * (uint64_t)2;
+    uint64_t d1 = r1 * (uint64_t)2;
+    uint64_t d2 = r2 * (uint64_t)2 * (uint64_t)19;
+    uint64_t d419 = r4 * (uint64_t)19;
+    uint64_t d4 = d419 * (uint64_t)2;
+    FStar_UInt128_t
+        s0 =
+            FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(r0, r0),
+                                                FStar_UInt128_mul_wide(d4, r1)),
+                              FStar_UInt128_mul_wide(d2, r3));
+    FStar_UInt128_t
+        s1 =
+            FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r1),
+                                                FStar_UInt128_mul_wide(d4, r2)),
+                              FStar_UInt128_mul_wide(r3 * (uint64_t)19, r3));
+    FStar_UInt128_t
+        s2 =
+            FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r2),
+                                                FStar_UInt128_mul_wide(r1, r1)),
+                              FStar_UInt128_mul_wide(d4, r3));
+    FStar_UInt128_t
+        s3 =
+            FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r3),
+                                                FStar_UInt128_mul_wide(d1, r2)),
+                              FStar_UInt128_mul_wide(r4, d419));
+    FStar_UInt128_t
+        s4 =
+            FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r4),
+                                                FStar_UInt128_mul_wide(d1, r3)),
+                              FStar_UInt128_mul_wide(r2, r2));
+    Hacl_Bignum_Fsquare_upd_5(tmp, s0, s1, s2, s3, s4);
+}
+
+inline static void
+Hacl_Bignum_Fsquare_fsquare_(FStar_UInt128_t *tmp, uint64_t *output)
+{
+    Hacl_Bignum_Fsquare_fsquare__(tmp, output);
+    Hacl_Bignum_Fproduct_carry_wide_(tmp);
+    FStar_UInt128_t b4 = tmp[4];
+    FStar_UInt128_t b0 = tmp[0];
+    FStar_UInt128_t
+        mask =
+            FStar_UInt128_sub(FStar_UInt128_shift_left(FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)1),
+                                                       (uint32_t)51),
+                              FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)1));
+    FStar_UInt128_t b4_ = FStar_UInt128_logand(b4, mask);
+    FStar_UInt128_t
+        b0_ =
+            FStar_UInt128_add(b0,
+                              FStar_UInt128_mul_wide((uint64_t)19,
+                                                     FStar_Int_Cast_Full_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51))));
+    tmp[4] = b4_;
+    tmp[0] = b0_;
+    Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
+    uint64_t i0 = output[0];
+    uint64_t i1 = output[1];
+    uint64_t i0_ = i0 & (((uint64_t)1 << (uint32_t)51) - (uint64_t)1);
+    uint64_t i1_ = i1 + (i0 >> (uint32_t)51);
+    output[0] = i0_;
+    output[1] = i1_;
+}
+
+inline static void
+Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *output, FStar_UInt128_t *tmp, uint32_t count1)
+{
+    if (count1 == (uint32_t)1)
+        Hacl_Bignum_Fsquare_fsquare_(tmp, output);
+    else {
+        uint32_t i = count1 - (uint32_t)1;
+        Hacl_Bignum_Fsquare_fsquare_(tmp, output);
+        Hacl_Bignum_Fsquare_fsquare_times_(output, tmp, i);
+    }
+}
+
+inline static void
+Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1)
+{
+    KRML_CHECK_SIZE(FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)0), (uint32_t)5);
+    FStar_UInt128_t t[5];
+    for (uintmax_t _i = 0; _i < (uint32_t)5; ++_i)
+        t[_i] = FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)0);
+    memcpy(output, input, (uint32_t)5 * sizeof input[0]);
+    Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
+}
+
+inline static void
+Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1)
+{
+    KRML_CHECK_SIZE(FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)0), (uint32_t)5);
+    FStar_UInt128_t t[5];
+    for (uintmax_t _i = 0; _i < (uint32_t)5; ++_i)
+        t[_i] = FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)0);
+    Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1);
+}
+
+inline static void
+Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z)
+{
+    uint64_t buf[20] = { 0 };
+    uint64_t *a = buf;
+    uint64_t *t00 = buf + (uint32_t)5;
+    uint64_t *b0 = buf + (uint32_t)10;
+    (void)(buf + (uint32_t)15);
+    Hacl_Bignum_Fsquare_fsquare_times(a, z, (uint32_t)1);
+    Hacl_Bignum_Fsquare_fsquare_times(t00, a, (uint32_t)2);
+    Hacl_Bignum_Fmul_fmul(b0, t00, z);
+    Hacl_Bignum_Fmul_fmul(a, b0, a);
+    Hacl_Bignum_Fsquare_fsquare_times(t00, a, (uint32_t)1);
+    Hacl_Bignum_Fmul_fmul(b0, t00, b0);
+    Hacl_Bignum_Fsquare_fsquare_times(t00, b0, (uint32_t)5);
+    uint64_t *t01 = buf + (uint32_t)5;
+    uint64_t *b1 = buf + (uint32_t)10;
+    uint64_t *c0 = buf + (uint32_t)15;
+    Hacl_Bignum_Fmul_fmul(b1, t01, b1);
+    Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)10);
+    Hacl_Bignum_Fmul_fmul(c0, t01, b1);
+    Hacl_Bignum_Fsquare_fsquare_times(t01, c0, (uint32_t)20);
+    Hacl_Bignum_Fmul_fmul(t01, t01, c0);
+    Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, (uint32_t)10);
+    Hacl_Bignum_Fmul_fmul(b1, t01, b1);
+    Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)50);
+    uint64_t *a0 = buf;
+    uint64_t *t0 = buf + (uint32_t)5;
+    uint64_t *b = buf + (uint32_t)10;
+    uint64_t *c = buf + (uint32_t)15;
+    Hacl_Bignum_Fmul_fmul(c, t0, b);
+    Hacl_Bignum_Fsquare_fsquare_times(t0, c, (uint32_t)100);
+    Hacl_Bignum_Fmul_fmul(t0, t0, c);
+    Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)50);
+    Hacl_Bignum_Fmul_fmul(t0, t0, b);
+    Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)5);
+    Hacl_Bignum_Fmul_fmul(out, t0, a0);
+}
+
+inline static void
+Hacl_Bignum_fsum(uint64_t *a, uint64_t *b)
+{
+    {
+        uint64_t uu____871 = a[0];
+        uint64_t uu____874 = b[0];
+        uint64_t uu____870 = uu____871 + uu____874;
+        a[0] = uu____870;
+    }
+    {
+        uint64_t uu____871 = a[1];
+        uint64_t uu____874 = b[1];
+        uint64_t uu____870 = uu____871 + uu____874;
+        a[1] = uu____870;
+    }
+    {
+        uint64_t uu____871 = a[2];
+        uint64_t uu____874 = b[2];
+        uint64_t uu____870 = uu____871 + uu____874;
+        a[2] = uu____870;
+    }
+    {
+        uint64_t uu____871 = a[3];
+        uint64_t uu____874 = b[3];
+        uint64_t uu____870 = uu____871 + uu____874;
+        a[3] = uu____870;
+    }
+    {
+        uint64_t uu____871 = a[4];
+        uint64_t uu____874 = b[4];
+        uint64_t uu____870 = uu____871 + uu____874;
+        a[4] = uu____870;
+    }
+}
+
+inline static void
+Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b)
+{
+    uint64_t tmp[5] = { 0 };
+    memcpy(tmp, b, (uint32_t)5 * sizeof b[0]);
+    uint64_t b0 = tmp[0];
+    uint64_t b1 = tmp[1];
+    uint64_t b2 = tmp[2];
+    uint64_t b3 = tmp[3];
+    uint64_t b4 = tmp[4];
+    tmp[0] = b0 + (uint64_t)0x3fffffffffff68;
+    tmp[1] = b1 + (uint64_t)0x3ffffffffffff8;
+    tmp[2] = b2 + (uint64_t)0x3ffffffffffff8;
+    tmp[3] = b3 + (uint64_t)0x3ffffffffffff8;
+    tmp[4] = b4 + (uint64_t)0x3ffffffffffff8;
+    {
+        uint64_t uu____871 = a[0];
+        uint64_t uu____874 = tmp[0];
+        uint64_t uu____870 = uu____874 - uu____871;
+        a[0] = uu____870;
+    }
+    {
+        uint64_t uu____871 = a[1];
+        uint64_t uu____874 = tmp[1];
+        uint64_t uu____870 = uu____874 - uu____871;
+        a[1] = uu____870;
+    }
+    {
+        uint64_t uu____871 = a[2];
+        uint64_t uu____874 = tmp[2];
+        uint64_t uu____870 = uu____874 - uu____871;
+        a[2] = uu____870;
+    }
+    {
+        uint64_t uu____871 = a[3];
+        uint64_t uu____874 = tmp[3];
+        uint64_t uu____870 = uu____874 - uu____871;
+        a[3] = uu____870;
+    }
+    {
+        uint64_t uu____871 = a[4];
+        uint64_t uu____874 = tmp[4];
+        uint64_t uu____870 = uu____874 - uu____871;
+        a[4] = uu____870;
+    }
+}
+
+inline static void
+Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s)
+{
+    KRML_CHECK_SIZE(FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)0), (uint32_t)5);
+    FStar_UInt128_t tmp[5];
+    for (uintmax_t _i = 0; _i < (uint32_t)5; ++_i)
+        tmp[_i] = FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)0);
+    {
+        uint64_t uu____429 = b[0];
+        FStar_UInt128_t uu____428 = FStar_UInt128_mul_wide(uu____429, s);
+        tmp[0] = uu____428;
+    }
+    {
+        uint64_t uu____429 = b[1];
+        FStar_UInt128_t uu____428 = FStar_UInt128_mul_wide(uu____429, s);
+        tmp[1] = uu____428;
+    }
+    {
+        uint64_t uu____429 = b[2];
+        FStar_UInt128_t uu____428 = FStar_UInt128_mul_wide(uu____429, s);
+        tmp[2] = uu____428;
+    }
+    {
+        uint64_t uu____429 = b[3];
+        FStar_UInt128_t uu____428 = FStar_UInt128_mul_wide(uu____429, s);
+        tmp[3] = uu____428;
+    }
+    {
+        uint64_t uu____429 = b[4];
+        FStar_UInt128_t uu____428 = FStar_UInt128_mul_wide(uu____429, s);
+        tmp[4] = uu____428;
+    }
+    Hacl_Bignum_Fproduct_carry_wide_(tmp);
+    FStar_UInt128_t b4 = tmp[4];
+    FStar_UInt128_t b0 = tmp[0];
+    FStar_UInt128_t
+        mask =
+            FStar_UInt128_sub(FStar_UInt128_shift_left(FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)1),
+                                                       (uint32_t)51),
+                              FStar_Int_Cast_Full_uint64_to_uint128((uint64_t)1));
+    FStar_UInt128_t b4_ = FStar_UInt128_logand(b4, mask);
+    FStar_UInt128_t
+        b0_ =
+            FStar_UInt128_add(b0,
+                              FStar_UInt128_mul_wide((uint64_t)19,
+                                                     FStar_Int_Cast_Full_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51))));
+    tmp[4] = b4_;
+    tmp[0] = b0_;
+    Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp);
+}
+
+inline static void
+Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b)
+{
+    Hacl_Bignum_Fmul_fmul(output, a, b);
+}
+
+inline static void
+Hacl_Bignum_crecip(uint64_t *output, uint64_t *input)
+{
+    Hacl_Bignum_Crecip_crecip(output, input);
+}
+
+static void
+Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
+{
+    uint32_t i = ctr - (uint32_t)1;
+    uint64_t ai = a[i];
+    uint64_t bi = b[i];
+    uint64_t x = swap1 & (ai ^ bi);
+    uint64_t ai1 = ai ^ x;
+    uint64_t bi1 = bi ^ x;
+    a[i] = ai1;
+    b[i] = bi1;
+}
+
+static void
+Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr)
+{
+    if (ctr == (uint32_t)0) {
+
+    } else {
+        Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr);
+        uint32_t i = ctr - (uint32_t)1;
+        Hacl_EC_Point_swap_conditional_(a, b, swap1, i);
+    }
+}
+
+static void
+Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap)
+{
+    uint64_t swap1 = (uint64_t)0 - iswap;
+    Hacl_EC_Point_swap_conditional_(a, b, swap1, (uint32_t)5);
+    Hacl_EC_Point_swap_conditional_(a + (uint32_t)5, b + (uint32_t)5, swap1, (uint32_t)5);
+}
+
+static void
+Hacl_EC_Point_copy(uint64_t *output, uint64_t *input)
+{
+    memcpy(output, input, (uint32_t)5 * sizeof input[0]);
+    memcpy(output + (uint32_t)5,
+           input + (uint32_t)5,
+           (uint32_t)5 * sizeof(input + (uint32_t)5)[0]);
+}
+
+static void
+Hacl_EC_AddAndDouble_fmonty(
+    uint64_t *pp,
+    uint64_t *ppq,
+    uint64_t *p,
+    uint64_t *pq,
+    uint64_t *qmqp)
+{
+    uint64_t *qx = qmqp;
+    uint64_t *x2 = pp;
+    uint64_t *z2 = pp + (uint32_t)5;
+    uint64_t *x3 = ppq;
+    uint64_t *z3 = ppq + (uint32_t)5;
+    uint64_t *x = p;
+    uint64_t *z = p + (uint32_t)5;
+    uint64_t *xprime = pq;
+    uint64_t *zprime = pq + (uint32_t)5;
+    uint64_t buf[40] = { 0 };
+    (void)(buf + (uint32_t)5);
+    (void)(buf + (uint32_t)10);
+    (void)(buf + (uint32_t)15);
+    (void)(buf + (uint32_t)20);
+    (void)(buf + (uint32_t)25);
+    (void)(buf + (uint32_t)30);
+    (void)(buf + (uint32_t)35);
+    uint64_t *origx = buf;
+    uint64_t *origxprime = buf + (uint32_t)5;
+    (void)(buf + (uint32_t)10);
+    (void)(buf + (uint32_t)15);
+    (void)(buf + (uint32_t)20);
+    uint64_t *xxprime0 = buf + (uint32_t)25;
+    uint64_t *zzprime0 = buf + (uint32_t)30;
+    (void)(buf + (uint32_t)35);
+    memcpy(origx, x, (uint32_t)5 * sizeof x[0]);
+    Hacl_Bignum_fsum(x, z);
+    Hacl_Bignum_fdifference(z, origx);
+    memcpy(origxprime, xprime, (uint32_t)5 * sizeof xprime[0]);
+    Hacl_Bignum_fsum(xprime, zprime);
+    Hacl_Bignum_fdifference(zprime, origxprime);
+    Hacl_Bignum_fmul(xxprime0, xprime, z);
+    Hacl_Bignum_fmul(zzprime0, x, zprime);
+    uint64_t *origxprime0 = buf + (uint32_t)5;
+    (void)(buf + (uint32_t)10);
+    uint64_t *xx0 = buf + (uint32_t)15;
+    uint64_t *zz0 = buf + (uint32_t)20;
+    uint64_t *xxprime = buf + (uint32_t)25;
+    uint64_t *zzprime = buf + (uint32_t)30;
+    uint64_t *zzzprime = buf + (uint32_t)35;
+    memcpy(origxprime0, xxprime, (uint32_t)5 * sizeof xxprime[0]);
+    Hacl_Bignum_fsum(xxprime, zzprime);
+    Hacl_Bignum_fdifference(zzprime, origxprime0);
+    Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, (uint32_t)1);
+    Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, (uint32_t)1);
+    Hacl_Bignum_fmul(z3, zzzprime, qx);
+    Hacl_Bignum_Fsquare_fsquare_times(xx0, x, (uint32_t)1);
+    Hacl_Bignum_Fsquare_fsquare_times(zz0, z, (uint32_t)1);
+    (void)(buf + (uint32_t)5);
+    uint64_t *zzz = buf + (uint32_t)10;
+    uint64_t *xx = buf + (uint32_t)15;
+    uint64_t *zz = buf + (uint32_t)20;
+    (void)(buf + (uint32_t)25);
+    (void)(buf + (uint32_t)30);
+    (void)(buf + (uint32_t)35);
+    Hacl_Bignum_fmul(x2, xx, zz);
+    Hacl_Bignum_fdifference(zz, xx);
+    uint64_t scalar = (uint64_t)121665;
+    Hacl_Bignum_fscalar(zzz, zz, scalar);
+    Hacl_Bignum_fsum(zzz, xx);
+    Hacl_Bignum_fmul(z2, zzz, zz);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step_1(
+    uint64_t *nq,
+    uint64_t *nqpq,
+    uint64_t *nq2,
+    uint64_t *nqpq2,
+    uint64_t *q,
+    uint8_t byt)
+{
+    uint64_t bit = (uint64_t)(byt >> (uint32_t)7);
+    Hacl_EC_Point_swap_conditional(nq, nqpq, bit);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step_2(
+    uint64_t *nq,
+    uint64_t *nqpq,
+    uint64_t *nq2,
+    uint64_t *nqpq2,
+    uint64_t *q,
+    uint8_t byt)
+{
+    Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(
+    uint64_t *nq,
+    uint64_t *nqpq,
+    uint64_t *nq2,
+    uint64_t *nqpq2,
+    uint64_t *q,
+    uint8_t byt)
+{
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step_1(nq, nqpq, nq2, nqpq2, q, byt);
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step_2(nq, nqpq, nq2, nqpq2, q, byt);
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step_1(nq2, nqpq2, nq, nqpq, q, byt);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(
+    uint64_t *nq,
+    uint64_t *nqpq,
+    uint64_t *nq2,
+    uint64_t *nqpq2,
+    uint64_t *q,
+    uint8_t byt)
+{
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt);
+    uint8_t byt1 = byt << (uint32_t)1;
+    Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1);
+}
+
+static void
+Hacl_EC_Ladder_SmallLoop_cmult_small_loop(
+    uint64_t *nq,
+    uint64_t *nqpq,
+    uint64_t *nq2,
+    uint64_t *nqpq2,
+    uint64_t *q,
+    uint8_t byt,
+    uint32_t i)
+{
+    if (i == (uint32_t)0) {
+
+    } else {
+        uint32_t i_ = i - (uint32_t)1;
+        Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt);
+        uint8_t byt_ = byt << (uint32_t)2;
+        Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt_, i_);
+    }
+}
+
+static void
+Hacl_EC_Ladder_BigLoop_cmult_big_loop(
+    uint8_t *n1,
+    uint64_t *nq,
+    uint64_t *nqpq,
+    uint64_t *nq2,
+    uint64_t *nqpq2,
+    uint64_t *q,
+    uint32_t i)
+{
+    if (i == (uint32_t)0) {
+
+    } else {
+        uint32_t i1 = i - (uint32_t)1;
+        uint8_t byte = n1[i1];
+        Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, (uint32_t)4);
+        Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1);
+    }
+}
+
+static void
+Hacl_EC_Ladder_cmult_(uint64_t *result, uint64_t *point_buf, uint8_t *n1, uint64_t *q)
+{
+    uint64_t *nq = point_buf;
+    uint64_t *nqpq = point_buf + (uint32_t)10;
+    uint64_t *nq2 = point_buf + (uint32_t)20;
+    uint64_t *nqpq2 = point_buf + (uint32_t)30;
+    Hacl_EC_Point_copy(nqpq, q);
+    nq[0] = (uint64_t)1;
+    Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, (uint32_t)32);
+    Hacl_EC_Point_copy(result, nq);
+}
+
+static void
+Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q)
+{
+    uint64_t point_buf[40] = { 0 };
+    Hacl_EC_Ladder_cmult_(result, point_buf, n1, q);
+}
+
+static void
+Hacl_EC_Format_upd_5(
+    uint64_t *output,
+    uint64_t output0,
+    uint64_t output1,
+    uint64_t output2,
+    uint64_t output3,
+    uint64_t output4)
+{
+    output[0] = output0;
+    output[1] = output1;
+    output[2] = output2;
+    output[3] = output3;
+    output[4] = output4;
+}
+
+static void
+Hacl_EC_Format_upd_5_(
+    uint64_t *output,
+    uint64_t output0,
+    uint64_t output1,
+    uint64_t output2,
+    uint64_t output3,
+    uint64_t output4)
+{
+    output[0] = output0;
+    output[1] = output1;
+    output[2] = output2;
+    output[3] = output3;
+    output[4] = output4;
+}
+
+static void
+Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input)
+{
+    uint64_t mask_511 = (uint64_t)0x7ffffffffffff;
+    uint64_t i0 = load64_le(input);
+    uint8_t *x00 = input + (uint32_t)6;
+    uint64_t i1 = load64_le(x00);
+    uint8_t *x01 = input + (uint32_t)12;
+    uint64_t i2 = load64_le(x01);
+    uint8_t *x02 = input + (uint32_t)19;
+    uint64_t i3 = load64_le(x02);
+    uint8_t *x0 = input + (uint32_t)24;
+    uint64_t i4 = load64_le(x0);
+    uint64_t output0 = i0 & mask_511;
+    uint64_t output1 = i1 >> (uint32_t)3 & mask_511;
+    uint64_t output2 = i2 >> (uint32_t)6 & mask_511;
+    uint64_t output3 = i3 >> (uint32_t)1 & mask_511;
+    uint64_t output4 = i4 >> (uint32_t)12 & mask_511;
+    Hacl_EC_Format_upd_5(output, output0, output1, output2, output3, output4);
+}
+
+static void
+Hacl_EC_Format_store_4(uint8_t *output, uint64_t v0, uint64_t v1, uint64_t v2, uint64_t v3)
+{
+    uint8_t *b0 = output;
+    uint8_t *b1 = output + (uint32_t)8;
+    uint8_t *b2 = output + (uint32_t)16;
+    uint8_t *b3 = output + (uint32_t)24;
+    store64_le(b0, v0);
+    store64_le(b1, v1);
+    store64_le(b2, v2);
+    store64_le(b3, v3);
+}
+
+static void
+Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input)
+{
+    uint64_t t0 = input[0];
+    uint64_t t1 = input[1];
+    uint64_t t2 = input[2];
+    uint64_t t3 = input[3];
+    uint64_t t4 = input[4];
+    uint64_t t1_ = t1 + (t0 >> (uint32_t)51);
+    uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffff;
+    uint64_t t2_ = t2 + (t1_ >> (uint32_t)51);
+    uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffff;
+    uint64_t t3_ = t3 + (t2_ >> (uint32_t)51);
+    uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffff;
+    uint64_t t4_ = t4 + (t3_ >> (uint32_t)51);
+    uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffff;
+    Hacl_EC_Format_upd_5_(input, t0_, t1__, t2__, t3__, t4_);
+}
+
+static void
+Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input)
+{
+    Hacl_EC_Format_fcontract_first_carry_pass(input);
+    Hacl_Bignum_Modulo_carry_top(input);
+}
+
+static void
+Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input)
+{
+    uint64_t t0 = input[0];
+    uint64_t t1 = input[1];
+    uint64_t t2 = input[2];
+    uint64_t t3 = input[3];
+    uint64_t t4 = input[4];
+    uint64_t t1_ = t1 + (t0 >> (uint32_t)51);
+    uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffff;
+    uint64_t t2_ = t2 + (t1_ >> (uint32_t)51);
+    uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffff;
+    uint64_t t3_ = t3 + (t2_ >> (uint32_t)51);
+    uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffff;
+    uint64_t t4_ = t4 + (t3_ >> (uint32_t)51);
+    uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffff;
+    Hacl_EC_Format_upd_5_(input, t0_, t1__, t2__, t3__, t4_);
+}
+
+static void
+Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input)
+{
+    Hacl_EC_Format_fcontract_second_carry_pass(input);
+    Hacl_Bignum_Modulo_carry_top(input);
+    uint64_t i0 = input[0];
+    uint64_t i1 = input[1];
+    uint64_t i0_ = i0 & (((uint64_t)1 << (uint32_t)51) - (uint64_t)1);
+    uint64_t i1_ = i1 + (i0 >> (uint32_t)51);
+    input[0] = i0_;
+    input[1] = i1_;
+}
+
+static void
+Hacl_EC_Format_fcontract_trim(uint64_t *input)
+{
+    uint64_t a0 = input[0];
+    uint64_t a1 = input[1];
+    uint64_t a2 = input[2];
+    uint64_t a3 = input[3];
+    uint64_t a4 = input[4];
+    uint64_t mask0 = FStar_UInt64_gte_mask(a0, (uint64_t)0x7ffffffffffed);
+    uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0x7ffffffffffff);
+    uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x7ffffffffffff);
+    uint64_t mask3 = FStar_UInt64_eq_mask(a3, (uint64_t)0x7ffffffffffff);
+    uint64_t mask4 = FStar_UInt64_eq_mask(a4, (uint64_t)0x7ffffffffffff);
+    uint64_t mask = mask0 & mask1 & mask2 & mask3 & mask4;
+    uint64_t a0_ = a0 - ((uint64_t)0x7ffffffffffed & mask);
+    uint64_t a1_ = a1 - ((uint64_t)0x7ffffffffffff & mask);
+    uint64_t a2_ = a2 - ((uint64_t)0x7ffffffffffff & mask);
+    uint64_t a3_ = a3 - ((uint64_t)0x7ffffffffffff & mask);
+    uint64_t a4_ = a4 - ((uint64_t)0x7ffffffffffff & mask);
+    Hacl_EC_Format_upd_5_(input, a0_, a1_, a2_, a3_, a4_);
+}
+
+static void
+Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input)
+{
+    uint64_t t0 = input[0];
+    uint64_t t1 = input[1];
+    uint64_t t2 = input[2];
+    uint64_t t3 = input[3];
+    uint64_t t4 = input[4];
+    uint64_t o0 = t1 << (uint32_t)51 | t0;
+    uint64_t o1 = t2 << (uint32_t)38 | t1 >> (uint32_t)13;
+    uint64_t o2 = t3 << (uint32_t)25 | t2 >> (uint32_t)26;
+    uint64_t o3 = t4 << (uint32_t)12 | t3 >> (uint32_t)39;
+    Hacl_EC_Format_store_4(output, o0, o1, o2, o3);
+}
+
+static void
+Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input)
+{
+    Hacl_EC_Format_fcontract_first_carry_full(input);
+    Hacl_EC_Format_fcontract_second_carry_full(input);
+    Hacl_EC_Format_fcontract_trim(input);
+    Hacl_EC_Format_fcontract_store(output, input);
+}
+
+static void
+Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point)
+{
+    uint64_t *x = point;
+    uint64_t *z = point + (uint32_t)5;
+    uint64_t buf[10] = { 0 };
+    uint64_t *zmone = buf;
+    uint64_t *sc = buf + (uint32_t)5;
+    Hacl_Bignum_crecip(zmone, z);
+    Hacl_Bignum_fmul(sc, x, zmone);
+    Hacl_EC_Format_fcontract(scalar, sc);
+}
+
+static void
+Hacl_EC_crypto_scalarmult__(
+    uint8_t *mypublic,
+    uint8_t *scalar,
+    uint8_t *basepoint,
+    uint64_t *q)
+{
+    uint64_t buf[15] = { 0 };
+    uint64_t *nq = buf;
+    uint64_t *x = nq;
+    (void)(nq + (uint32_t)5);
+    (void)(buf + (uint32_t)5);
+    x[0] = (uint64_t)1;
+    Hacl_EC_Ladder_cmult(nq, scalar, q);
+    Hacl_EC_Format_scalar_of_point(mypublic, nq);
+}
+
+static void
+Hacl_EC_crypto_scalarmult_(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint, uint64_t *q)
+{
+    uint8_t e[32] = { 0 };
+    memcpy(e, secret, (uint32_t)32 * sizeof secret[0]);
+    uint8_t e0 = e[0];
+    uint8_t e31 = e[31];
+    uint8_t e01 = e0 & (uint8_t)248;
+    uint8_t e311 = e31 & (uint8_t)127;
+    uint8_t e312 = e311 | (uint8_t)64;
+    e[0] = e01;
+    e[31] = e312;
+    uint8_t *scalar = e;
+    Hacl_EC_crypto_scalarmult__(mypublic, scalar, basepoint, q);
+}
+
+void
+Hacl_EC_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint)
+{
+    uint64_t buf[10] = { 0 };
+    uint64_t *x = buf;
+    uint64_t *z = buf + (uint32_t)5;
+    Hacl_EC_Format_fexpand(x, basepoint);
+    z[0] = (uint64_t)1;
+    uint64_t *q = buf;
+    Hacl_EC_crypto_scalarmult_(mypublic, secret, basepoint, q);
+}
+
+void *
+Curve25519_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b)
+{
+    return (void *)(uint8_t)0;
+}
+
+void
+Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint)
+{
+    Hacl_EC_crypto_scalarmult(mypublic, secret, basepoint);
+}
new file mode 100644
--- /dev/null
+++ b/lib/freebl/verified/hacl_curve25519_64.h
@@ -0,0 +1,60 @@
+// Copyright 2016-2017 INRIA and Microsoft Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/* This file was auto-generated by KreMLin! */
+
+#include "kremlib.h"
+#ifndef __Hacl_Curve25519_64_H
+#define __Hacl_Curve25519_64_H
+
+typedef uint64_t Hacl_Bignum_Constants_limb;
+
+typedef FStar_UInt128_t Hacl_Bignum_Constants_wide;
+
+typedef uint64_t Hacl_Bignum_Parameters_limb;
+
+typedef FStar_UInt128_t Hacl_Bignum_Parameters_wide;
+
+typedef uint32_t Hacl_Bignum_Parameters_ctr;
+
+typedef uint64_t *Hacl_Bignum_Parameters_felem;
+
+typedef FStar_UInt128_t *Hacl_Bignum_Parameters_felem_wide;
+
+typedef void *Hacl_Bignum_Parameters_seqelem;
+
+typedef void *Hacl_Bignum_Parameters_seqelem_wide;
+
+typedef FStar_UInt128_t Hacl_Bignum_Wide_t;
+
+typedef uint64_t Hacl_Bignum_Limb_t;
+
+extern void Hacl_Bignum_lemma_diff(Prims_int x0, Prims_int x1, Prims_pos x2);
+
+typedef uint64_t *Hacl_EC_Point_point;
+
+typedef uint8_t *Hacl_EC_Ladder_SmallLoop_uint8_p;
+
+typedef uint8_t *Hacl_EC_Ladder_uint8_p;
+
+typedef uint8_t *Hacl_EC_Format_uint8_p;
+
+void Hacl_EC_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint);
+
+typedef uint8_t *Curve25519_uint8_p;
+
+void *Curve25519_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b);
+
+void Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint);
+#endif
new file mode 100644
--- /dev/null
+++ b/lib/freebl/verified/kremlib.h
@@ -0,0 +1,412 @@
+// Copyright 2016-2017 Microsoft Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef __KREMLIB_H
+#define __KREMLIB_H
+
+#include <inttypes.h>
+#include <stdlib.h>
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+
+// Define __cdecl and friends when using GCC, so that we can safely compile code
+// that contains __cdecl on all platforms.
+#ifndef _MSC_VER
+// Use the gcc predefined macros if on a platform/architectures that set them.
+// Otherwise define them to be empty.
+#ifndef __cdecl
+#define __cdecl
+#endif
+#ifndef __stdcall
+#define __stdcall
+#endif
+#ifndef __fastcall
+#define __fastcall
+#endif
+#endif
+
+// GCC-specific attribute syntax; everyone else gets the standard C inline
+// attribute.
+#ifdef __GNU_C__
+#ifndef __clang__
+#define force_inline inline __attribute__((always_inline))
+#else
+#define force_inline inline
+#endif
+#else
+#define force_inline inline
+#endif
+
+// Uppercase issue; we have to define lowercase version of the C macros (as we
+// have no way to refer to an uppercase *variable* in F*).
+extern int exit_success;
+extern int exit_failure;
+
+// Some types that KreMLin has no special knowledge of; many of them appear in
+// signatures of ghost functions, meaning that it suffices to give them (any)
+// definition.
+typedef void *Prims_pos, *Prims_nat, *Prims_nonzero, *FStar_Seq_Base_seq,
+    *Prims_int, *Prims_prop, *FStar_HyperStack_mem, *FStar_Set_set,
+    *Prims_st_pre_h, *FStar_Heap_heap, *Prims_all_pre_h, *FStar_TSet_set,
+    *Prims_string, *Prims_list, *FStar_Map_t, *FStar_UInt63_t_, *FStar_Int63_t_,
+    *FStar_UInt63_t, *FStar_Int63_t, *FStar_UInt_uint_t, *FStar_Int_int_t,
+    *FStar_HyperStack_stackref, *FStar_Bytes_bytes, *FStar_HyperHeap_rid,
+    *FStar_Heap_aref, *FStar_Monotonic_Heap_heap,
+    *FStar_Monotonic_HyperHeap_rid, *FStar_Monotonic_HyperStack_mem;
+
+#define KRML_CHECK_SIZE(elt, size)                                               \
+    if (((size_t)size) > SIZE_MAX / sizeof(elt)) {                               \
+        printf("Maximum allocatable size exceeded, aborting before overflow at " \
+               "%s:%d\n",                                                        \
+               __FILE__, __LINE__);                                              \
+        exit(253);                                                               \
+    }
+
+// Endian-ness
+
+// ... for Linux
+#if defined(__linux__) || defined(__CYGWIN__)
+#include <endian.h>
+
+// ... for OSX
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define htole64(x) OSSwapHostToLittleInt64(x)
+#define le64toh(x) OSSwapLittleToHostInt64(x)
+#define htobe64(x) OSSwapHostToBigInt64(x)
+#define be64toh(x) OSSwapBigToHostInt64(x)
+
+#define htole16(x) OSSwapHostToLittleInt16(x)
+#define le16toh(x) OSSwapLittleToHostInt16(x)
+#define htobe16(x) OSSwapHostToBigInt16(x)
+#define be16toh(x) OSSwapBigToHostInt16(x)
+
+#define htole32(x) OSSwapHostToLittleInt32(x)
+#define le32toh(x) OSSwapLittleToHostInt32(x)
+#define htobe32(x) OSSwapHostToBigInt32(x)
+#define be32toh(x) OSSwapBigToHostInt32(x)
+
+// ... for Windows
+#elif (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && \
+    !defined(__WINDOWS__)
+#include <windows.h>
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+
+#if defined(_MSC_VER)
+#include <stdlib.h>
+#define htobe16(x) _byteswap_ushort(x)
+#define htole16(x) (x)
+#define be16toh(x) _byteswap_ushort(x)
+#define le16toh(x) (x)
+
+#define htobe32(x) _byteswap_ulong(x)
+#define htole32(x) (x)
+#define be32toh(x) _byteswap_ulong(x)
+#define le32toh(x) (x)
+
+#define htobe64(x) _byteswap_uint64(x)
+#define htole64(x) (x)
+#define be64toh(x) _byteswap_uint64(x)
+#define le64toh(x) (x)
+
+#elif defined(__GNUC__) || defined(__clang__)
+
+#define htobe16(x) __builtin_bswap16(x)
+#define htole16(x) (x)
+#define be16toh(x) __builtin_bswap16(x)
+#define le16toh(x) (x)
+
+#define htobe32(x) __builtin_bswap32(x)
+#define htole32(x) (x)
+#define be32toh(x) __builtin_bswap32(x)
+#define le32toh(x) (x)
+
+#define htobe64(x) __builtin_bswap64(x)
+#define htole64(x) (x)
+#define be64toh(x) __builtin_bswap64(x)
+#define le64toh(x) (x)
+#endif
+
+#endif
+
+#endif
+
+// Loads and stores. These avoid undefined behavior due to unaligned memory
+// accesses, via memcpy.
+
+inline static uint16_t
+load16(uint8_t *b)
+{
+    uint16_t x;
+    memcpy(&x, b, 2);
+    return x;
+}
+
+inline static uint32_t
+load32(uint8_t *b)
+{
+    uint32_t x;
+    memcpy(&x, b, 4);
+    return x;
+}
+
+inline static uint64_t
+load64(uint8_t *b)
+{
+    uint64_t x;
+    memcpy(&x, b, 8);
+    return x;
+}
+
+inline static void
+store16(uint8_t *b, uint16_t i)
+{
+    memcpy(b, &i, 2);
+}
+
+inline static void
+store32(uint8_t *b, uint32_t i)
+{
+    memcpy(b, &i, 4);
+}
+
+inline static void
+store64(uint8_t *b, uint64_t i)
+{
+    memcpy(b, &i, 8);
+}
+
+#define load16_le(b) (le16toh(load16(b)))
+#define store16_le(b, i) (store16(b, htole16(i)))
+#define load16_be(b) (be16toh(load16(b)))
+#define store16_be(b, i) (store16(b, htobe16(i)))
+
+#define load32_le(b) (le32toh(load32(b)))
+#define store32_le(b, i) (store32(b, htole32(i)))
+#define load32_be(b) (be32toh(load32(b)))
+#define store32_be(b, i) (store32(b, htobe32(i)))
+
+#define load64_le(b) (le64toh(load64(b)))
+#define store64_le(b, i) (store64(b, htole64(i)))
+#define load64_be(b) (be64toh(load64(b)))
+#define store64_be(b, i) (store64(b, htobe64(i)))
+
+// Integer types
+typedef uint64_t FStar_UInt64_t, FStar_UInt64_t_;
+typedef int64_t FStar_Int64_t, FStar_Int64_t_;
+typedef uint32_t FStar_UInt32_t, FStar_UInt32_t_;
+typedef int32_t FStar_Int32_t, FStar_Int32_t_;
+typedef uint16_t FStar_UInt16_t, FStar_UInt16_t_;
+typedef int16_t FStar_Int16_t, FStar_Int16_t_;
+typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_;
+typedef int8_t FStar_Int8_t, FStar_Int8_t_;
+
+// Constant time comparisons
+static inline uint8_t
+FStar_UInt8_eq_mask(uint8_t x, uint8_t y)
+{
+    x = ~(x ^ y);
+    x &= x << 4;
+    x &= x << 2;
+    x &= x << 1;
+    return (int8_t)x >> 7;
+}
+
+static inline uint8_t
+FStar_UInt8_gte_mask(uint8_t x, uint8_t y)
+{
+    return ~(uint8_t)(((int32_t)x - y) >> 31);
+}
+
+static inline uint16_t
+FStar_UInt16_eq_mask(uint16_t x, uint16_t y)
+{
+    x = ~(x ^ y);
+    x &= x << 8;
+    x &= x << 4;
+    x &= x << 2;
+    x &= x << 1;
+    return (int16_t)x >> 15;
+}
+
+static inline uint16_t
+FStar_UInt16_gte_mask(uint16_t x, uint16_t y)
+{
+    return ~(uint16_t)(((int32_t)x - y) >> 31);
+}
+
+static inline uint32_t
+FStar_UInt32_eq_mask(uint32_t x, uint32_t y)
+{
+    x = ~(x ^ y);
+    x &= x << 16;
+    x &= x << 8;
+    x &= x << 4;
+    x &= x << 2;
+    x &= x << 1;
+    return ((int32_t)x) >> 31;
+}
+
+static inline uint32_t
+FStar_UInt32_gte_mask(uint32_t x, uint32_t y)
+{
+    return ~((uint32_t)(((int64_t)x - y) >> 63));
+}
+
+static inline uint64_t
+FStar_UInt64_eq_mask(uint64_t x, uint64_t y)
+{
+    x = ~(x ^ y);
+    x &= x << 32;
+    x &= x << 16;
+    x &= x << 8;
+    x &= x << 4;
+    x &= x << 2;
+    x &= x << 1;
+    return ((int64_t)x) >> 63;
+}
+
+static inline uint64_t
+FStar_UInt64_gte_mask(uint64_t x, uint64_t y)
+{
+    uint64_t low63 =
+        ~((uint64_t)((int64_t)((int64_t)(x & UINT64_C(0x7fffffffffffffff)) -
+                               (int64_t)(y & UINT64_C(0x7fffffffffffffff))) >>
+                     63));
+    uint64_t high_bit =
+        ~((uint64_t)((int64_t)((int64_t)(x & UINT64_C(0x8000000000000000)) -
+                               (int64_t)(y & UINT64_C(0x8000000000000000))) >>
+                     63));
+    return low63 & high_bit;
+}
+
+// Platform-specific 128-bit arithmetic. These are static functions in a header,
+// so that each translation unit gets its own copy and the C compiler can
+// optimize.
+#ifdef HAVE_INT128_SUPPORT
+typedef unsigned __int128 FStar_UInt128_t, FStar_UInt128_t_, uint128_t;
+
+static inline uint128_t
+load128_le(uint8_t *b)
+{
+    uint128_t l = (uint128_t)load64_le(b);
+    uint128_t h = (uint128_t)load64_le(b + 8);
+    return (h << 64 | l);
+}
+
+static inline void
+store128_le(uint8_t *b, uint128_t n)
+{
+    store64_le(b, (uint64_t)n);
+    store64_le(b + 8, (uint64_t)(n >> 64));
+}
+
+static inline uint128_t
+load128_be(uint8_t *b)
+{
+    uint128_t h = (uint128_t)load64_be(b);
+    uint128_t l = (uint128_t)load64_be(b + 8);
+    return (h << 64 | l);
+}
+
+static inline void
+store128_be(uint8_t *b, uint128_t n)
+{
+    store64_be(b, (uint64_t)(n >> 64));
+    store64_be(b + 8, (uint64_t)n);
+}
+
+#define FStar_UInt128_add(x, y) ((x) + (y))
+#define FStar_UInt128_mul(x, y) ((x) * (y))
+#define FStar_UInt128_add_mod(x, y) ((x) + (y))
+#define FStar_UInt128_sub(x, y) ((x) - (y))
+#define FStar_UInt128_sub_mod(x, y) ((x) - (y))
+#define FStar_UInt128_logand(x, y) ((x) & (y))
+#define FStar_UInt128_logor(x, y) ((x) | (y))
+#define FStar_UInt128_logxor(x, y) ((x) ^ (y))
+#define FStar_UInt128_lognot(x) (~(x))
+#define FStar_UInt128_shift_left(x, y) ((x) << (y))
+#define FStar_UInt128_shift_right(x, y) ((x) >> (y))
+#define FStar_UInt128_uint64_to_uint128(x) ((uint128_t)(x))
+#define FStar_UInt128_uint128_to_uint64(x) ((uint64_t)(x))
+#define FStar_Int_Cast_Full_uint64_to_uint128(x) ((uint128_t)(x))
+#define FStar_Int_Cast_Full_uint128_to_uint64(x) ((uint64_t)(x))
+#define FStar_UInt128_mul_wide(x, y) ((uint128_t)(x) * (y))
+#define FStar_UInt128_op_Hat_Hat(x, y) ((x) ^ (y))
+
+static inline uint128_t
+FStar_UInt128_eq_mask(uint128_t x, uint128_t y)
+{
+    uint64_t mask =
+        FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) &
+        FStar_UInt64_eq_mask(x, y);
+    return ((uint128_t)mask) << 64 | mask;
+}
+
+static inline uint128_t
+FStar_UInt128_gte_mask(uint128_t x, uint128_t y)
+{
+    uint64_t mask =
+        (FStar_UInt64_gte_mask(x >> 64, y >> 64) &
+         ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) |
+        (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y));
+    return ((uint128_t)mask) << 64 | mask;
+}
+
+#else // defined(HAVE_INT128_SUPPORT)
+
+#include "fstar_uint128.h"
+
+typedef FStar_UInt128_uint128 FStar_UInt128_t_, uint128_t;
+
+static inline void
+load128_le_(uint8_t *b, uint128_t *r)
+{
+    r->low = load64_le(b);
+    r->high = load64_le(b + 8);
+}
+
+static inline void
+store128_le_(uint8_t *b, uint128_t *n)
+{
+    store64_le(b, n->low);
+    store64_le(b + 8, n->high);
+}
+
+static inline void
+load128_be_(uint8_t *b, uint128_t *r)
+{
+    r->high = load64_be(b);
+    r->low = load64_be(b + 8);
+}
+
+static inline void
+store128_be_(uint8_t *b, uint128_t *n)
+{
+    store64_be(b, n->high);
+    store64_be(b + 8, n->low);
+}
+
+/* #define print128 print128_ */
+#define load128_le load128_le_
+#define store128_le store128_le_
+#define load128_be load128_be_
+#define store128_be store128_be_
+
+#endif // HAVE_INT128_SUPPORT
+#endif // __KREMLIB_H