283 files changed, 58637 insertions, 0 deletions
diff --git a/third_party/bearssl/src/aes_big_cbcdec.c b/third_party/bearssl/src/aes_big_cbcdec.c
new file mode 100644
index 0000000..d969a3b
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_cbcdec.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcdec_init(br_aes_big_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_cbcdec_vtable;
+	ctx->num_rounds = br_aes_big_keysched_inv(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcdec_run(const br_aes_big_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+		int i;
+
+		memcpy(tmp, buf, 16);
+		br_aes_big_decrypt(ctx->num_rounds, ctx->skey, buf);
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_big_cbcdec_vtable = {
+	sizeof(br_aes_big_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_big_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_big_cbcdec_run
+};
diff --git a/third_party/bearssl/src/aes_big_cbcenc.c b/third_party/bearssl/src/aes_big_cbcenc.c
new file mode 100644
index 0000000..265e53b
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_cbcenc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcenc_init(br_aes_big_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_cbcenc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_cbcenc_run(const br_aes_big_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, buf);
+		memcpy(ivbuf, buf, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_big_cbcenc_vtable = {
+	sizeof(br_aes_big_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_big_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_big_cbcenc_run
+};
diff --git a/third_party/bearssl/src/aes_big_ctr.c b/third_party/bearssl/src/aes_big_ctr.c
new file mode 100644
index 0000000..18fbb84
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_ctr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctr_init(br_aes_big_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_ctr_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_big_ctr_run(const br_aes_big_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+
+		memcpy(tmp, iv, 12);
+		br_enc32be(tmp + 12, cc ++);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		if (len <= 16) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_big_ctr_vtable = {
+	sizeof(br_aes_big_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_big_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_big_ctr_run
+};
diff --git a/third_party/bearssl/src/aes_big_ctrcbc.c b/third_party/bearssl/src/aes_big_ctrcbc.c
new file mode 100644
index 0000000..d45ca76
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_ctrcbc.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_init(br_aes_big_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_big_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_ctr(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf, *bctr;
+	uint32_t cc0, cc1, cc2, cc3;
+
+	buf = data;
+	bctr = ctr;
+	cc3 = br_dec32be(bctr +  0);
+	cc2 = br_dec32be(bctr +  4);
+	cc1 = br_dec32be(bctr +  8);
+	cc0 = br_dec32be(bctr + 12);
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t carry;
+
+		br_enc32be(tmp +  0, cc3);
+		br_enc32be(tmp +  4, cc2);
+		br_enc32be(tmp +  8, cc1);
+		br_enc32be(tmp + 12, cc0);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+		cc0 ++;
+		carry = (~(cc0 | -cc0)) >> 31;
+		cc1 += carry;
+		carry &= (~(cc1 | -cc1)) >> 31;
+		cc2 += carry;
+		carry &= (~(cc2 | -cc2)) >> 31;
+		cc3 += carry;
+	}
+	br_enc32be(bctr +  0, cc3);
+	br_enc32be(bctr +  4, cc2);
+	br_enc32be(bctr +  8, cc1);
+	br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_mac(const br_aes_big_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		xorbuf(cbcmac, buf, 16);
+		br_aes_big_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_encrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+	br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_big_ctrcbc_decrypt(const br_aes_big_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_big_ctrcbc_mac(ctx, cbcmac, data, len);
+	br_aes_big_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_big_ctrcbc_vtable = {
+	sizeof(br_aes_big_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_big_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_big_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_big_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_big_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_big_ctrcbc_mac
+};
diff --git a/third_party/bearssl/src/aes_big_dec.c b/third_party/bearssl/src/aes_big_dec.c
new file mode 100644
index 0000000..a5d0e3c
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_dec.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Inverse S-box (used in key schedule for decryption).
+ */
+static const unsigned char iS[] = {
+	0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
+	0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+	0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
+	0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+	0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
+	0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+	0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
+	0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+	0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
+	0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+	0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+	0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+	0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
+	0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+	0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
+	0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+	0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
+	0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+	0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
+	0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+	0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
+	0x55, 0x21, 0x0C, 0x7D
+};
+
+static const uint32_t iSsm0[] = {
+	0x51F4A750, 0x7E416553, 0x1A17A4C3, 0x3A275E96, 0x3BAB6BCB, 0x1F9D45F1,
+	0xACFA58AB, 0x4BE30393, 0x2030FA55, 0xAD766DF6, 0x88CC7691, 0xF5024C25,
+	0x4FE5D7FC, 0xC52ACBD7, 0x26354480, 0xB562A38F, 0xDEB15A49, 0x25BA1B67,
+	0x45EA0E98, 0x5DFEC0E1, 0xC32F7502, 0x814CF012, 0x8D4697A3, 0x6BD3F9C6,
+	0x038F5FE7, 0x15929C95, 0xBF6D7AEB, 0x955259DA, 0xD4BE832D, 0x587421D3,
+	0x49E06929, 0x8EC9C844, 0x75C2896A, 0xF48E7978, 0x99583E6B, 0x27B971DD,
+	0xBEE14FB6, 0xF088AD17, 0xC920AC66, 0x7DCE3AB4, 0x63DF4A18, 0xE51A3182,
+	0x97513360, 0x62537F45, 0xB16477E0, 0xBB6BAE84, 0xFE81A01C, 0xF9082B94,
+	0x70486858, 0x8F45FD19, 0x94DE6C87, 0x527BF8B7, 0xAB73D323, 0x724B02E2,
+	0xE31F8F57, 0x6655AB2A, 0xB2EB2807, 0x2FB5C203, 0x86C57B9A, 0xD33708A5,
+	0x302887F2, 0x23BFA5B2, 0x02036ABA, 0xED16825C, 0x8ACF1C2B, 0xA779B492,
+	0xF307F2F0, 0x4E69E2A1, 0x65DAF4CD, 0x0605BED5, 0xD134621F, 0xC4A6FE8A,
+	0x342E539D, 0xA2F355A0, 0x058AE132, 0xA4F6EB75, 0x0B83EC39, 0x4060EFAA,
+	0x5E719F06, 0xBD6E1051, 0x3E218AF9, 0x96DD063D, 0xDD3E05AE, 0x4DE6BD46,
+	0x91548DB5, 0x71C45D05, 0x0406D46F, 0x605015FF, 0x1998FB24, 0xD6BDE997,
+	0x894043CC, 0x67D99E77, 0xB0E842BD, 0x07898B88, 0xE7195B38, 0x79C8EEDB,
+	0xA17C0A47, 0x7C420FE9, 0xF8841EC9, 0x00000000, 0x09808683, 0x322BED48,
+	0x1E1170AC, 0x6C5A724E, 0xFD0EFFFB, 0x0F853856, 0x3DAED51E, 0x362D3927,
+	0x0A0FD964, 0x685CA621, 0x9B5B54D1, 0x24362E3A, 0x0C0A67B1, 0x9357E70F,
+	0xB4EE96D2, 0x1B9B919E, 0x80C0C54F, 0x61DC20A2, 0x5A774B69, 0x1C121A16,
+	0xE293BA0A, 0xC0A02AE5, 0x3C22E043, 0x121B171D, 0x0E090D0B, 0xF28BC7AD,
+	0x2DB6A8B9, 0x141EA9C8, 0x57F11985, 0xAF75074C, 0xEE99DDBB, 0xA37F60FD,
+	0xF701269F, 0x5C72F5BC, 0x44663BC5, 0x5BFB7E34, 0x8B432976, 0xCB23C6DC,
+	0xB6EDFC68, 0xB8E4F163, 0xD731DCCA, 0x42638510, 0x13972240, 0x84C61120,
+	0x854A247D, 0xD2BB3DF8, 0xAEF93211, 0xC729A16D, 0x1D9E2F4B, 0xDCB230F3,
+	0x0D8652EC, 0x77C1E3D0, 0x2BB3166C, 0xA970B999, 0x119448FA, 0x47E96422,
+	0xA8FC8CC4, 0xA0F03F1A, 0x567D2CD8, 0x223390EF, 0x87494EC7, 0xD938D1C1,
+	0x8CCAA2FE, 0x98D40B36, 0xA6F581CF, 0xA57ADE28, 0xDAB78E26, 0x3FADBFA4,
+	0x2C3A9DE4, 0x5078920D, 0x6A5FCC9B, 0x547E4662, 0xF68D13C2, 0x90D8B8E8,
+	0x2E39F75E, 0x82C3AFF5, 0x9F5D80BE, 0x69D0937C, 0x6FD52DA9, 0xCF2512B3,
+	0xC8AC993B, 0x10187DA7, 0xE89C636E, 0xDB3BBB7B, 0xCD267809, 0x6E5918F4,
+	0xEC9AB701, 0x834F9AA8, 0xE6956E65, 0xAAFFE67E, 0x21BCCF08, 0xEF15E8E6,
+	0xBAE79BD9, 0x4A6F36CE, 0xEA9F09D4, 0x29B07CD6, 0x31A4B2AF, 0x2A3F2331,
+	0xC6A59430, 0x35A266C0, 0x744EBC37, 0xFC82CAA6, 0xE090D0B0, 0x33A7D815,
+	0xF104984A, 0x41ECDAF7, 0x7FCD500E, 0x1791F62F, 0x764DD68D, 0x43EFB04D,
+	0xCCAA4D54, 0xE49604DF, 0x9ED1B5E3, 0x4C6A881B, 0xC12C1FB8, 0x4665517F,
+	0x9D5EEA04, 0x018C355D, 0xFA877473, 0xFB0B412E, 0xB3671D5A, 0x92DBD252,
+	0xE9105633, 0x6DD64713, 0x9AD7618C, 0x37A10C7A, 0x59F8148E, 0xEB133C89,
+	0xCEA927EE, 0xB761C935, 0xE11CE5ED, 0x7A47B13C, 0x9CD2DF59, 0x55F2733F,
+	0x1814CE79, 0x73C737BF, 0x53F7CDEA, 0x5FFDAA5B, 0xDF3D6F14, 0x7844DB86,
+	0xCAAFF381, 0xB968C43E, 0x3824342C, 0xC2A3405F, 0x161DC372, 0xBCE2250C,
+	0x283C498B, 0xFF0D9541, 0x39A80171, 0x080CB3DE, 0xD8B4E49C, 0x6456C190,
+	0x7BCB8461, 0xD532B670, 0x486C5C74, 0xD0B85742
+};
+
+static unsigned
+mul2(unsigned x)
+{
+	x <<= 1;
+	return x ^ ((unsigned)(-(int)(x >> 8)) & 0x11B);
+}
+
+static unsigned
+mul9(unsigned x)
+{
+	return x ^ mul2(mul2(mul2(x)));
+}
+
+static unsigned
+mulb(unsigned x)
+{
+	unsigned x2;
+	
+	x2 = mul2(x);
+	return x ^ x2 ^ mul2(mul2(x2));
+}
+
+static unsigned
+muld(unsigned x)
+{
+	unsigned x4;
+
+	x4 = mul2(mul2(x));
+	return x ^ x4 ^ mul2(x4);
+}
+
+static unsigned
+mule(unsigned x)
+{
+	unsigned x2, x4;
+
+	x2 = mul2(x);
+	x4 = mul2(x2);
+	return x2 ^ x4 ^ mul2(x4);
+}
+
+/* see inner.h */
+unsigned
+br_aes_big_keysched_inv(uint32_t *skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, m;
+
+	/*
+	 * Sub-keys for decryption are distinct from encryption sub-keys
+	 * in that InvMixColumns() is already applied for the inner
+	 * rounds.
+	 */
+	num_rounds = br_aes_keysched(skey, key, key_len);
+	m = (int)(num_rounds << 2);
+	for (i = 4; i < m; i ++) {
+		uint32_t p;
+		unsigned p0, p1, p2, p3;
+		uint32_t q0, q1, q2, q3;
+
+		p = skey[i];
+		p0 = p >> 24;
+		p1 = (p >> 16) & 0xFF;
+		p2 = (p >> 8) & 0xFF;
+		p3 = p & 0xFF;
+		q0 = mule(p0) ^ mulb(p1) ^ muld(p2) ^ mul9(p3);
+		q1 = mul9(p0) ^ mule(p1) ^ mulb(p2) ^ muld(p3);
+		q2 = muld(p0) ^ mul9(p1) ^ mule(p2) ^ mulb(p3);
+		q3 = mulb(p0) ^ muld(p1) ^ mul9(p2) ^ mule(p3);
+		skey[i] = (q0 << 24) | (q1 << 16) | (q2 << 8) | q3;
+	}
+	return num_rounds;
+}
+
+static inline uint32_t
+rotr(uint32_t x, int n)
+{
+	return (x << (32 - n)) | (x >> n);
+}
+
+#define iSboxExt0(x)   (iSsm0[x])
+#define iSboxExt1(x)   (rotr(iSsm0[x], 8))
+#define iSboxExt2(x)   (rotr(iSsm0[x], 16))
+#define iSboxExt3(x)   (rotr(iSsm0[x], 24))
+
+/* see bearssl.h */
+void
+br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	uint32_t s0, s1, s2, s3;
+	uint32_t t0, t1, t2, t3;
+	unsigned u;
+
+	buf = data;
+	s0 = br_dec32be(buf);
+	s1 = br_dec32be(buf + 4);
+	s2 = br_dec32be(buf + 8);
+	s3 = br_dec32be(buf + 12);
+	s0 ^= skey[(num_rounds << 2) + 0];
+	s1 ^= skey[(num_rounds << 2) + 1];
+	s2 ^= skey[(num_rounds << 2) + 2];
+	s3 ^= skey[(num_rounds << 2) + 3];
+	for (u = num_rounds - 1; u > 0; u --) {
+		uint32_t v0 = iSboxExt0(s0 >> 24)
+			^ iSboxExt1((s3 >> 16) & 0xFF)
+			^ iSboxExt2((s2 >> 8) & 0xFF)
+			^ iSboxExt3(s1 & 0xFF);
+		uint32_t v1 = iSboxExt0(s1 >> 24)
+			^ iSboxExt1((s0 >> 16) & 0xFF)
+			^ iSboxExt2((s3 >> 8) & 0xFF)
+			^ iSboxExt3(s2 & 0xFF);
+		uint32_t v2 = iSboxExt0(s2 >> 24)
+			^ iSboxExt1((s1 >> 16) & 0xFF)
+			^ iSboxExt2((s0 >> 8) & 0xFF)
+			^ iSboxExt3(s3 & 0xFF);
+		uint32_t v3 = iSboxExt0(s3 >> 24)
+			^ iSboxExt1((s2 >> 16) & 0xFF)
+			^ iSboxExt2((s1 >> 8) & 0xFF)
+			^ iSboxExt3(s0 & 0xFF);
+		s0 = v0;
+		s1 = v1;
+		s2 = v2;
+		s3 = v3;
+		s0 ^= skey[u << 2];
+		s1 ^= skey[(u << 2) + 1];
+		s2 ^= skey[(u << 2) + 2];
+		s3 ^= skey[(u << 2) + 3];
+	}
+	t0 = ((uint32_t)iS[s0 >> 24] << 24)
+		| ((uint32_t)iS[(s3 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s2 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s1 & 0xFF];
+	t1 = ((uint32_t)iS[s1 >> 24] << 24)
+		| ((uint32_t)iS[(s0 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s3 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s2 & 0xFF];
+	t2 = ((uint32_t)iS[s2 >> 24] << 24)
+		| ((uint32_t)iS[(s1 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s0 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s3 & 0xFF];
+	t3 = ((uint32_t)iS[s3 >> 24] << 24)
+		| ((uint32_t)iS[(s2 >> 16) & 0xFF] << 16)
+		| ((uint32_t)iS[(s1 >> 8) & 0xFF] << 8)
+		| (uint32_t)iS[s0 & 0xFF];
+	s0 = t0 ^ skey[0];
+	s1 = t1 ^ skey[1];
+	s2 = t2 ^ skey[2];
+	s3 = t3 ^ skey[3];
+	br_enc32be(buf, s0);
+	br_enc32be(buf + 4, s1);
+	br_enc32be(buf + 8, s2);
+	br_enc32be(buf + 12, s3);
+}
diff --git a/third_party/bearssl/src/aes_big_enc.c b/third_party/bearssl/src/aes_big_enc.c
new file mode 100644
index 0000000..bbabb9a
--- /dev/null
+++ b/third_party/bearssl/src/aes_big_enc.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define S   br_aes_S
+
+static const uint32_t Ssm0[] = {
+	0xC66363A5, 0xF87C7C84, 0xEE777799, 0xF67B7B8D, 0xFFF2F20D, 0xD66B6BBD,
+	0xDE6F6FB1, 0x91C5C554, 0x60303050, 0x02010103, 0xCE6767A9, 0x562B2B7D,
+	0xE7FEFE19, 0xB5D7D762, 0x4DABABE6, 0xEC76769A, 0x8FCACA45, 0x1F82829D,
+	0x89C9C940, 0xFA7D7D87, 0xEFFAFA15, 0xB25959EB, 0x8E4747C9, 0xFBF0F00B,
+	0x41ADADEC, 0xB3D4D467, 0x5FA2A2FD, 0x45AFAFEA, 0x239C9CBF, 0x53A4A4F7,
+	0xE4727296, 0x9BC0C05B, 0x75B7B7C2, 0xE1FDFD1C, 0x3D9393AE, 0x4C26266A,
+	0x6C36365A, 0x7E3F3F41, 0xF5F7F702, 0x83CCCC4F, 0x6834345C, 0x51A5A5F4,
+	0xD1E5E534, 0xF9F1F108, 0xE2717193, 0xABD8D873, 0x62313153, 0x2A15153F,
+	0x0804040C, 0x95C7C752, 0x46232365, 0x9DC3C35E, 0x30181828, 0x379696A1,
+	0x0A05050F, 0x2F9A9AB5, 0x0E070709, 0x24121236, 0x1B80809B, 0xDFE2E23D,
+	0xCDEBEB26, 0x4E272769, 0x7FB2B2CD, 0xEA75759F, 0x1209091B, 0x1D83839E,
+	0x582C2C74, 0x341A1A2E, 0x361B1B2D, 0xDC6E6EB2, 0xB45A5AEE, 0x5BA0A0FB,
+	0xA45252F6, 0x763B3B4D, 0xB7D6D661, 0x7DB3B3CE, 0x5229297B, 0xDDE3E33E,
+	0x5E2F2F71, 0x13848497, 0xA65353F5, 0xB9D1D168, 0x00000000, 0xC1EDED2C,
+	0x40202060, 0xE3FCFC1F, 0x79B1B1C8, 0xB65B5BED, 0xD46A6ABE, 0x8DCBCB46,
+	0x67BEBED9, 0x7239394B, 0x944A4ADE, 0x984C4CD4, 0xB05858E8, 0x85CFCF4A,
+	0xBBD0D06B, 0xC5EFEF2A, 0x4FAAAAE5, 0xEDFBFB16, 0x864343C5, 0x9A4D4DD7,
+	0x66333355, 0x11858594, 0x8A4545CF, 0xE9F9F910, 0x04020206, 0xFE7F7F81,
+	0xA05050F0, 0x783C3C44, 0x259F9FBA, 0x4BA8A8E3, 0xA25151F3, 0x5DA3A3FE,
+	0x804040C0, 0x058F8F8A, 0x3F9292AD, 0x219D9DBC, 0x70383848, 0xF1F5F504,
+	0x63BCBCDF, 0x77B6B6C1, 0xAFDADA75, 0x42212163, 0x20101030, 0xE5FFFF1A,
+	0xFDF3F30E, 0xBFD2D26D, 0x81CDCD4C, 0x180C0C14, 0x26131335, 0xC3ECEC2F,
+	0xBE5F5FE1, 0x359797A2, 0x884444CC, 0x2E171739, 0x93C4C457, 0x55A7A7F2,
+	0xFC7E7E82, 0x7A3D3D47, 0xC86464AC, 0xBA5D5DE7, 0x3219192B, 0xE6737395,
+	0xC06060A0, 0x19818198, 0x9E4F4FD1, 0xA3DCDC7F, 0x44222266, 0x542A2A7E,
+	0x3B9090AB, 0x0B888883, 0x8C4646CA, 0xC7EEEE29, 0x6BB8B8D3, 0x2814143C,
+	0xA7DEDE79, 0xBC5E5EE2, 0x160B0B1D, 0xADDBDB76, 0xDBE0E03B, 0x64323256,
+	0x743A3A4E, 0x140A0A1E, 0x924949DB, 0x0C06060A, 0x4824246C, 0xB85C5CE4,
+	0x9FC2C25D, 0xBDD3D36E, 0x43ACACEF, 0xC46262A6, 0x399191A8, 0x319595A4,
+	0xD3E4E437, 0xF279798B, 0xD5E7E732, 0x8BC8C843, 0x6E373759, 0xDA6D6DB7,
+	0x018D8D8C, 0xB1D5D564, 0x9C4E4ED2, 0x49A9A9E0, 0xD86C6CB4, 0xAC5656FA,
+	0xF3F4F407, 0xCFEAEA25, 0xCA6565AF, 0xF47A7A8E, 0x47AEAEE9, 0x10080818,
+	0x6FBABAD5, 0xF0787888, 0x4A25256F, 0x5C2E2E72, 0x381C1C24, 0x57A6A6F1,
+	0x73B4B4C7, 0x97C6C651, 0xCBE8E823, 0xA1DDDD7C, 0xE874749C, 0x3E1F1F21,
+	0x964B4BDD, 0x61BDBDDC, 0x0D8B8B86, 0x0F8A8A85, 0xE0707090, 0x7C3E3E42,
+	0x71B5B5C4, 0xCC6666AA, 0x904848D8, 0x06030305, 0xF7F6F601, 0x1C0E0E12,
+	0xC26161A3, 0x6A35355F, 0xAE5757F9, 0x69B9B9D0, 0x17868691, 0x99C1C158,
+	0x3A1D1D27, 0x279E9EB9, 0xD9E1E138, 0xEBF8F813, 0x2B9898B3, 0x22111133,
+	0xD26969BB, 0xA9D9D970, 0x078E8E89, 0x339494A7, 0x2D9B9BB6, 0x3C1E1E22,
+	0x15878792, 0xC9E9E920, 0x87CECE49, 0xAA5555FF, 0x50282878, 0xA5DFDF7A,
+	0x038C8C8F, 0x59A1A1F8, 0x09898980, 0x1A0D0D17, 0x65BFBFDA, 0xD7E6E631,
+	0x844242C6, 0xD06868B8, 0x824141C3, 0x299999B0, 0x5A2D2D77, 0x1E0F0F11,
+	0x7BB0B0CB, 0xA85454FC, 0x6DBBBBD6, 0x2C16163A
+};
+
+static inline uint32_t
+rotr(uint32_t x, int n)
+{
+	return (x << (32 - n)) | (x >> n);
+}
+
+#define SboxExt0(x)   (Ssm0[x])
+#define SboxExt1(x)   (rotr(Ssm0[x], 8))
+#define SboxExt2(x)   (rotr(Ssm0[x], 16))
+#define SboxExt3(x)   (rotr(Ssm0[x], 24))
+
+
+/* see bearssl.h */
+void
+br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	uint32_t s0, s1, s2, s3;
+	uint32_t t0, t1, t2, t3;
+	unsigned u;
+
+	buf = data;
+	s0 = br_dec32be(buf);
+	s1 = br_dec32be(buf + 4);
+	s2 = br_dec32be(buf + 8);
+	s3 = br_dec32be(buf + 12);
+	s0 ^= skey[0];
+	s1 ^= skey[1];
+	s2 ^= skey[2];
+	s3 ^= skey[3];
+	for (u = 1; u < num_rounds; u ++) {
+		uint32_t v0, v1, v2, v3;
+
+		v0 = SboxExt0(s0 >> 24)
+			^ SboxExt1((s1 >> 16) & 0xFF)
+			^ SboxExt2((s2 >> 8) & 0xFF)
+			^ SboxExt3(s3 & 0xFF);
+		v1 = SboxExt0(s1 >> 24)
+			^ SboxExt1((s2 >> 16) & 0xFF)
+			^ SboxExt2((s3 >> 8) & 0xFF)
+			^ SboxExt3(s0 & 0xFF);
+		v2 = SboxExt0(s2 >> 24)
+			^ SboxExt1((s3 >> 16) & 0xFF)
+			^ SboxExt2((s0 >> 8) & 0xFF)
+			^ SboxExt3(s1 & 0xFF);
+		v3 = SboxExt0(s3 >> 24)
+			^ SboxExt1((s0 >> 16) & 0xFF)
+			^ SboxExt2((s1 >> 8) & 0xFF)
+			^ SboxExt3(s2 & 0xFF);
+		s0 = v0;
+		s1 = v1;
+		s2 = v2;
+		s3 = v3;
+		s0 ^= skey[u << 2];
+		s1 ^= skey[(u << 2) + 1];
+		s2 ^= skey[(u << 2) + 2];
+		s3 ^= skey[(u << 2) + 3];
+	}
+	t0 = ((uint32_t)S[s0 >> 24] << 24)
+		| ((uint32_t)S[(s1 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s2 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s3 & 0xFF];
+	t1 = ((uint32_t)S[s1 >> 24] << 24)
+		| ((uint32_t)S[(s2 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s3 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s0 & 0xFF];
+	t2 = ((uint32_t)S[s2 >> 24] << 24)
+		| ((uint32_t)S[(s3 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s0 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s1 & 0xFF];
+	t3 = ((uint32_t)S[s3 >> 24] << 24)
+		| ((uint32_t)S[(s0 >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(s1 >> 8) & 0xFF] << 8)
+		| (uint32_t)S[s2 & 0xFF];
+	s0 = t0 ^ skey[num_rounds << 2];
+	s1 = t1 ^ skey[(num_rounds << 2) + 1];
+	s2 = t2 ^ skey[(num_rounds << 2) + 2];
+	s3 = t3 ^ skey[(num_rounds << 2) + 3];
+	br_enc32be(buf, s0);
+	br_enc32be(buf + 4, s1);
+	br_enc32be(buf + 8, s2);
+	br_enc32be(buf + 12, s3);
+}
diff --git a/third_party/bearssl/src/aes_common.c b/third_party/bearssl/src/aes_common.c
new file mode 100644
index 0000000..72c64fb
--- /dev/null
+++ b/third_party/bearssl/src/aes_common.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const uint32_t Rcon[] = {
+	0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
+	0x40000000, 0x80000000, 0x1B000000, 0x36000000
+};
+
+#define S   br_aes_S
+
+/* see inner.h */
+const unsigned char br_aes_S[] = {
+	0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B,
+	0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+	0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26,
+	0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+	0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2,
+	0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+	0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED,
+	0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+	0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F,
+	0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+	0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC,
+	0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+	0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14,
+	0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+	0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D,
+	0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+	0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F,
+	0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+	0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11,
+	0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+	0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F,
+	0xB0, 0x54, 0xBB, 0x16
+};
+
+static uint32_t
+SubWord(uint32_t x)
+{
+	return ((uint32_t)S[x >> 24] << 24)
+		| ((uint32_t)S[(x >> 16) & 0xFF] << 16)
+		| ((uint32_t)S[(x >> 8) & 0xFF] << 8)
+		| (uint32_t)S[x & 0xFF];
+}
+
+/* see inner.h */
+unsigned
+br_aes_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	for (i = 0; i < nk; i ++) {
+		skey[i] = br_dec32be((const unsigned char *)key + (i << 2));
+	}
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		uint32_t tmp;
+
+		tmp = skey[i - 1];
+		if (j == 0) {
+			tmp = (tmp << 8) | (tmp >> 24);
+			tmp = SubWord(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = SubWord(tmp);
+		}
+		skey[i] = skey[i - nk] ^ tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+	return num_rounds;
+}
diff --git a/third_party/bearssl/src/aes_ct.c b/third_party/bearssl/src/aes_ct.c
new file mode 100644
index 0000000..66776d9
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct.c
@@ -0,0 +1,328 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_Sbox(uint32_t *q)
+{
+	/*
+	 * This S-box implementation is a straightforward translation of
+	 * the circuit described by Boyar and Peralta in "A new
+	 * combinational logic minimization technique with applications
+	 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+	 *
+	 * Note that variables x* (input) and s* (output) are numbered
+	 * in "reverse" order (x0 is the high bit, x7 is the low bit).
+	 */
+
+	uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
+	uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
+	uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+	uint32_t y20, y21;
+	uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
+	uint32_t z10, z11, z12, z13, z14, z15, z16, z17;
+	uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+	uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
+	uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
+	uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
+	uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
+	uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
+	uint32_t t60, t61, t62, t63, t64, t65, t66, t67;
+	uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+	x0 = q[7];
+	x1 = q[6];
+	x2 = q[5];
+	x3 = q[4];
+	x4 = q[3];
+	x5 = q[2];
+	x6 = q[1];
+	x7 = q[0];
+
+	/*
+	 * Top linear transformation.
+	 */
+	y14 = x3 ^ x5;
+	y13 = x0 ^ x6;
+	y9 = x0 ^ x3;
+	y8 = x0 ^ x5;
+	t0 = x1 ^ x2;
+	y1 = t0 ^ x7;
+	y4 = y1 ^ x3;
+	y12 = y13 ^ y14;
+	y2 = y1 ^ x0;
+	y5 = y1 ^ x6;
+	y3 = y5 ^ y8;
+	t1 = x4 ^ y12;
+	y15 = t1 ^ x5;
+	y20 = t1 ^ x1;
+	y6 = y15 ^ x7;
+	y10 = y15 ^ t0;
+	y11 = y20 ^ y9;
+	y7 = x7 ^ y11;
+	y17 = y10 ^ y11;
+	y19 = y10 ^ y8;
+	y16 = t0 ^ y11;
+	y21 = y13 ^ y16;
+	y18 = x0 ^ y16;
+
+	/*
+	 * Non-linear section.
+	 */
+	t2 = y12 & y15;
+	t3 = y3 & y6;
+	t4 = t3 ^ t2;
+	t5 = y4 & x7;
+	t6 = t5 ^ t2;
+	t7 = y13 & y16;
+	t8 = y5 & y1;
+	t9 = t8 ^ t7;
+	t10 = y2 & y7;
+	t11 = t10 ^ t7;
+	t12 = y9 & y11;
+	t13 = y14 & y17;
+	t14 = t13 ^ t12;
+	t15 = y8 & y10;
+	t16 = t15 ^ t12;
+	t17 = t4 ^ t14;
+	t18 = t6 ^ t16;
+	t19 = t9 ^ t14;
+	t20 = t11 ^ t16;
+	t21 = t17 ^ y20;
+	t22 = t18 ^ y19;
+	t23 = t19 ^ y21;
+	t24 = t20 ^ y18;
+
+	t25 = t21 ^ t22;
+	t26 = t21 & t23;
+	t27 = t24 ^ t26;
+	t28 = t25 & t27;
+	t29 = t28 ^ t22;
+	t30 = t23 ^ t24;
+	t31 = t22 ^ t26;
+	t32 = t31 & t30;
+	t33 = t32 ^ t24;
+	t34 = t23 ^ t33;
+	t35 = t27 ^ t33;
+	t36 = t24 & t35;
+	t37 = t36 ^ t34;
+	t38 = t27 ^ t36;
+	t39 = t29 & t38;
+	t40 = t25 ^ t39;
+
+	t41 = t40 ^ t37;
+	t42 = t29 ^ t33;
+	t43 = t29 ^ t40;
+	t44 = t33 ^ t37;
+	t45 = t42 ^ t41;
+	z0 = t44 & y15;
+	z1 = t37 & y6;
+	z2 = t33 & x7;
+	z3 = t43 & y16;
+	z4 = t40 & y1;
+	z5 = t29 & y7;
+	z6 = t42 & y11;
+	z7 = t45 & y17;
+	z8 = t41 & y10;
+	z9 = t44 & y12;
+	z10 = t37 & y3;
+	z11 = t33 & y4;
+	z12 = t43 & y13;
+	z13 = t40 & y5;
+	z14 = t29 & y2;
+	z15 = t42 & y9;
+	z16 = t45 & y14;
+	z17 = t41 & y8;
+
+	/*
+	 * Bottom linear transformation.
+	 */
+	t46 = z15 ^ z16;
+	t47 = z10 ^ z11;
+	t48 = z5 ^ z13;
+	t49 = z9 ^ z10;
+	t50 = z2 ^ z12;
+	t51 = z2 ^ z5;
+	t52 = z7 ^ z8;
+	t53 = z0 ^ z3;
+	t54 = z6 ^ z7;
+	t55 = z16 ^ z17;
+	t56 = z12 ^ t48;
+	t57 = t50 ^ t53;
+	t58 = z4 ^ t46;
+	t59 = z3 ^ t54;
+	t60 = t46 ^ t57;
+	t61 = z14 ^ t57;
+	t62 = t52 ^ t58;
+	t63 = t49 ^ t58;
+	t64 = z4 ^ t59;
+	t65 = t61 ^ t62;
+	t66 = z1 ^ t63;
+	s0 = t59 ^ t63;
+	s6 = t56 ^ ~t62;
+	s7 = t48 ^ ~t60;
+	t67 = t64 ^ t65;
+	s3 = t53 ^ t66;
+	s4 = t51 ^ t66;
+	s5 = t47 ^ t65;
+	s1 = t64 ^ ~s3;
+	s2 = t55 ^ ~t67;
+
+	q[7] = s0;
+	q[6] = s1;
+	q[5] = s2;
+	q[4] = s3;
+	q[3] = s4;
+	q[2] = s5;
+	q[1] = s6;
+	q[0] = s7;
+}
+
+/* see inner.h */
+void
+br_aes_ct_ortho(uint32_t *q)
+{
+#define SWAPN(cl, ch, s, x, y)   do { \
+		uint32_t a, b; \
+		a = (x); \
+		b = (y); \
+		(x) = (a & (uint32_t)cl) | ((b & (uint32_t)cl) << (s)); \
+		(y) = ((a & (uint32_t)ch) >> (s)) | (b & (uint32_t)ch); \
+	} while (0)
+
+#define SWAP2(x, y)   SWAPN(0x55555555, 0xAAAAAAAA, 1, x, y)
+#define SWAP4(x, y)   SWAPN(0x33333333, 0xCCCCCCCC, 2, x, y)
+#define SWAP8(x, y)   SWAPN(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y)
+
+	SWAP2(q[0], q[1]);
+	SWAP2(q[2], q[3]);
+	SWAP2(q[4], q[5]);
+	SWAP2(q[6], q[7]);
+
+	SWAP4(q[0], q[2]);
+	SWAP4(q[1], q[3]);
+	SWAP4(q[4], q[6]);
+	SWAP4(q[5], q[7]);
+
+	SWAP8(q[0], q[4]);
+	SWAP8(q[1], q[5]);
+	SWAP8(q[2], q[6]);
+	SWAP8(q[3], q[7]);
+}
+
+static const unsigned char Rcon[] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
+};
+
+static uint32_t
+sub_word(uint32_t x)
+{
+	uint32_t q[8];
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		q[i] = x;
+	}
+	br_aes_ct_ortho(q);
+	br_aes_ct_bitslice_Sbox(q);
+	br_aes_ct_ortho(q);
+	return q[0];
+}
+
+/* see inner.h */
+unsigned
+br_aes_ct_keysched(uint32_t *comp_skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+	uint32_t tmp;
+	uint32_t skey[120];
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	tmp = 0;
+	for (i = 0; i < nk; i ++) {
+		tmp = br_dec32le((const unsigned char *)key + (i << 2));
+		skey[(i << 1) + 0] = tmp;
+		skey[(i << 1) + 1] = tmp;
+	}
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		if (j == 0) {
+			tmp = (tmp << 24) | (tmp >> 8);
+			tmp = sub_word(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = sub_word(tmp);
+		}
+		tmp ^= skey[(i - nk) << 1];
+		skey[(i << 1) + 0] = tmp;
+		skey[(i << 1) + 1] = tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+	for (i = 0; i < nkf; i += 4) {
+		br_aes_ct_ortho(skey + (i << 1));
+	}
+	for (i = 0, j = 0; i < nkf; i ++, j += 2) {
+		comp_skey[i] = (skey[j + 0] & 0x55555555)
+			| (skey[j + 1] & 0xAAAAAAAA);
+	}
+	return num_rounds;
+}
+
+/* see inner.h */
+void
+br_aes_ct_skey_expand(uint32_t *skey,
+	unsigned num_rounds, const uint32_t *comp_skey)
+{
+	unsigned u, v, n;
+
+	n = (num_rounds + 1) << 2;
+	for (u = 0, v = 0; u < n; u ++, v += 2) {
+		uint32_t x, y;
+
+		x = y = comp_skey[u];
+		x &= 0x55555555;
+		skey[v + 0] = x | (x << 1);
+		y &= 0xAAAAAAAA;
+		skey[v + 1] = y | (y >> 1);
+	}
+}
diff --git a/third_party/bearssl/src/aes_ct64.c b/third_party/bearssl/src/aes_ct64.c
new file mode 100644
index 0000000..1523811
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_Sbox(uint64_t *q)
+{
+	/*
+	 * This S-box implementation is a straightforward translation of
+	 * the circuit described by Boyar and Peralta in "A new
+	 * combinational logic minimization technique with applications
+	 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
+	 *
+	 * Note that variables x* (input) and s* (output) are numbered
+	 * in "reverse" order (x0 is the high bit, x7 is the low bit).
+	 */
+
+	uint64_t x0, x1, x2, x3, x4, x5, x6, x7;
+	uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
+	uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+	uint64_t y20, y21;
+	uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
+	uint64_t z10, z11, z12, z13, z14, z15, z16, z17;
+	uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
+	uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
+	uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
+	uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
+	uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
+	uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
+	uint64_t t60, t61, t62, t63, t64, t65, t66, t67;
+	uint64_t s0, s1, s2, s3, s4, s5, s6, s7;
+
+	x0 = q[7];
+	x1 = q[6];
+	x2 = q[5];
+	x3 = q[4];
+	x4 = q[3];
+	x5 = q[2];
+	x6 = q[1];
+	x7 = q[0];
+
+	/*
+	 * Top linear transformation.
+	 */
+	y14 = x3 ^ x5;
+	y13 = x0 ^ x6;
+	y9 = x0 ^ x3;
+	y8 = x0 ^ x5;
+	t0 = x1 ^ x2;
+	y1 = t0 ^ x7;
+	y4 = y1 ^ x3;
+	y12 = y13 ^ y14;
+	y2 = y1 ^ x0;
+	y5 = y1 ^ x6;
+	y3 = y5 ^ y8;
+	t1 = x4 ^ y12;
+	y15 = t1 ^ x5;
+	y20 = t1 ^ x1;
+	y6 = y15 ^ x7;
+	y10 = y15 ^ t0;
+	y11 = y20 ^ y9;
+	y7 = x7 ^ y11;
+	y17 = y10 ^ y11;
+	y19 = y10 ^ y8;
+	y16 = t0 ^ y11;
+	y21 = y13 ^ y16;
+	y18 = x0 ^ y16;
+
+	/*
+	 * Non-linear section.
+	 */
+	t2 = y12 & y15;
+	t3 = y3 & y6;
+	t4 = t3 ^ t2;
+	t5 = y4 & x7;
+	t6 = t5 ^ t2;
+	t7 = y13 & y16;
+	t8 = y5 & y1;
+	t9 = t8 ^ t7;
+	t10 = y2 & y7;
+	t11 = t10 ^ t7;
+	t12 = y9 & y11;
+	t13 = y14 & y17;
+	t14 = t13 ^ t12;
+	t15 = y8 & y10;
+	t16 = t15 ^ t12;
+	t17 = t4 ^ t14;
+	t18 = t6 ^ t16;
+	t19 = t9 ^ t14;
+	t20 = t11 ^ t16;
+	t21 = t17 ^ y20;
+	t22 = t18 ^ y19;
+	t23 = t19 ^ y21;
+	t24 = t20 ^ y18;
+
+	t25 = t21 ^ t22;
+	t26 = t21 & t23;
+	t27 = t24 ^ t26;
+	t28 = t25 & t27;
+	t29 = t28 ^ t22;
+	t30 = t23 ^ t24;
+	t31 = t22 ^ t26;
+	t32 = t31 & t30;
+	t33 = t32 ^ t24;
+	t34 = t23 ^ t33;
+	t35 = t27 ^ t33;
+	t36 = t24 & t35;
+	t37 = t36 ^ t34;
+	t38 = t27 ^ t36;
+	t39 = t29 & t38;
+	t40 = t25 ^ t39;
+
+	t41 = t40 ^ t37;
+	t42 = t29 ^ t33;
+	t43 = t29 ^ t40;
+	t44 = t33 ^ t37;
+	t45 = t42 ^ t41;
+	z0 = t44 & y15;
+	z1 = t37 & y6;
+	z2 = t33 & x7;
+	z3 = t43 & y16;
+	z4 = t40 & y1;
+	z5 = t29 & y7;
+	z6 = t42 & y11;
+	z7 = t45 & y17;
+	z8 = t41 & y10;
+	z9 = t44 & y12;
+	z10 = t37 & y3;
+	z11 = t33 & y4;
+	z12 = t43 & y13;
+	z13 = t40 & y5;
+	z14 = t29 & y2;
+	z15 = t42 & y9;
+	z16 = t45 & y14;
+	z17 = t41 & y8;
+
+	/*
+	 * Bottom linear transformation.
+	 */
+	t46 = z15 ^ z16;
+	t47 = z10 ^ z11;
+	t48 = z5 ^ z13;
+	t49 = z9 ^ z10;
+	t50 = z2 ^ z12;
+	t51 = z2 ^ z5;
+	t52 = z7 ^ z8;
+	t53 = z0 ^ z3;
+	t54 = z6 ^ z7;
+	t55 = z16 ^ z17;
+	t56 = z12 ^ t48;
+	t57 = t50 ^ t53;
+	t58 = z4 ^ t46;
+	t59 = z3 ^ t54;
+	t60 = t46 ^ t57;
+	t61 = z14 ^ t57;
+	t62 = t52 ^ t58;
+	t63 = t49 ^ t58;
+	t64 = z4 ^ t59;
+	t65 = t61 ^ t62;
+	t66 = z1 ^ t63;
+	s0 = t59 ^ t63;
+	s6 = t56 ^ ~t62;
+	s7 = t48 ^ ~t60;
+	t67 = t64 ^ t65;
+	s3 = t53 ^ t66;
+	s4 = t51 ^ t66;
+	s5 = t47 ^ t65;
+	s1 = t64 ^ ~s3;
+	s2 = t55 ^ ~t67;
+
+	q[7] = s0;
+	q[6] = s1;
+	q[5] = s2;
+	q[4] = s3;
+	q[3] = s4;
+	q[2] = s5;
+	q[1] = s6;
+	q[0] = s7;
+}
+
+/* see inner.h */
+void
+br_aes_ct64_ortho(uint64_t *q)
+{
+#define SWAPN(cl, ch, s, x, y)   do { \
+		uint64_t a, b; \
+		a = (x); \
+		b = (y); \
+		(x) = (a & (uint64_t)cl) | ((b & (uint64_t)cl) << (s)); \
+		(y) = ((a & (uint64_t)ch) >> (s)) | (b & (uint64_t)ch); \
+	} while (0)
+
+#define SWAP2(x, y)    SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA,  1, x, y)
+#define SWAP4(x, y)    SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC,  2, x, y)
+#define SWAP8(x, y)    SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0,  4, x, y)
+
+	SWAP2(q[0], q[1]);
+	SWAP2(q[2], q[3]);
+	SWAP2(q[4], q[5]);
+	SWAP2(q[6], q[7]);
+
+	SWAP4(q[0], q[2]);
+	SWAP4(q[1], q[3]);
+	SWAP4(q[4], q[6]);
+	SWAP4(q[5], q[7]);
+
+	SWAP8(q[0], q[4]);
+	SWAP8(q[1], q[5]);
+	SWAP8(q[2], q[6]);
+	SWAP8(q[3], q[7]);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w)
+{
+	uint64_t x0, x1, x2, x3;
+
+	x0 = w[0];
+	x1 = w[1];
+	x2 = w[2];
+	x3 = w[3];
+	x0 |= (x0 << 16);
+	x1 |= (x1 << 16);
+	x2 |= (x2 << 16);
+	x3 |= (x3 << 16);
+	x0 &= (uint64_t)0x0000FFFF0000FFFF;
+	x1 &= (uint64_t)0x0000FFFF0000FFFF;
+	x2 &= (uint64_t)0x0000FFFF0000FFFF;
+	x3 &= (uint64_t)0x0000FFFF0000FFFF;
+	x0 |= (x0 << 8);
+	x1 |= (x1 << 8);
+	x2 |= (x2 << 8);
+	x3 |= (x3 << 8);
+	x0 &= (uint64_t)0x00FF00FF00FF00FF;
+	x1 &= (uint64_t)0x00FF00FF00FF00FF;
+	x2 &= (uint64_t)0x00FF00FF00FF00FF;
+	x3 &= (uint64_t)0x00FF00FF00FF00FF;
+	*q0 = x0 | (x2 << 8);
+	*q1 = x1 | (x3 << 8);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1)
+{
+	uint64_t x0, x1, x2, x3;
+
+	x0 = q0 & (uint64_t)0x00FF00FF00FF00FF;
+	x1 = q1 & (uint64_t)0x00FF00FF00FF00FF;
+	x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
+	x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
+	x0 |= (x0 >> 8);
+	x1 |= (x1 >> 8);
+	x2 |= (x2 >> 8);
+	x3 |= (x3 >> 8);
+	x0 &= (uint64_t)0x0000FFFF0000FFFF;
+	x1 &= (uint64_t)0x0000FFFF0000FFFF;
+	x2 &= (uint64_t)0x0000FFFF0000FFFF;
+	x3 &= (uint64_t)0x0000FFFF0000FFFF;
+	w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16);
+	w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16);
+	w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16);
+	w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16);
+}
+
+static const unsigned char Rcon[] = {
+	0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
+};
+
+static uint32_t
+sub_word(uint32_t x)
+{
+	uint64_t q[8];
+
+	memset(q, 0, sizeof q);
+	q[0] = x;
+	br_aes_ct64_ortho(q);
+	br_aes_ct64_bitslice_Sbox(q);
+	br_aes_ct64_ortho(q);
+	return (uint32_t)q[0];
+}
+
+/* see inner.h */
+unsigned
+br_aes_ct64_keysched(uint64_t *comp_skey, const void *key, size_t key_len)
+{
+	unsigned num_rounds;
+	int i, j, k, nk, nkf;
+	uint32_t tmp;
+	uint32_t skey[60];
+
+	switch (key_len) {
+	case 16:
+		num_rounds = 10;
+		break;
+	case 24:
+		num_rounds = 12;
+		break;
+	case 32:
+		num_rounds = 14;
+		break;
+	default:
+		/* abort(); */
+		return 0;
+	}
+	nk = (int)(key_len >> 2);
+	nkf = (int)((num_rounds + 1) << 2);
+	br_range_dec32le(skey, (key_len >> 2), key);
+	tmp = skey[(key_len >> 2) - 1];
+	for (i = nk, j = 0, k = 0; i < nkf; i ++) {
+		if (j == 0) {
+			tmp = (tmp << 24) | (tmp >> 8);
+			tmp = sub_word(tmp) ^ Rcon[k];
+		} else if (nk > 6 && j == 4) {
+			tmp = sub_word(tmp);
+		}
+		tmp ^= skey[i - nk];
+		skey[i] = tmp;
+		if (++ j == nk) {
+			j = 0;
+			k ++;
+		}
+	}
+
+	for (i = 0, j = 0; i < nkf; i += 4, j += 2) {
+		uint64_t q[8];
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], skey + i);
+		q[1] = q[0];
+		q[2] = q[0];
+		q[3] = q[0];
+		q[5] = q[4];
+		q[6] = q[4];
+		q[7] = q[4];
+		br_aes_ct64_ortho(q);
+		comp_skey[j + 0] =
+			  (q[0] & (uint64_t)0x1111111111111111)
+			| (q[1] & (uint64_t)0x2222222222222222)
+			| (q[2] & (uint64_t)0x4444444444444444)
+			| (q[3] & (uint64_t)0x8888888888888888);
+		comp_skey[j + 1] =
+			  (q[4] & (uint64_t)0x1111111111111111)
+			| (q[5] & (uint64_t)0x2222222222222222)
+			| (q[6] & (uint64_t)0x4444444444444444)
+			| (q[7] & (uint64_t)0x8888888888888888);
+	}
+	return num_rounds;
+}
+
+/* see inner.h */
+void
+br_aes_ct64_skey_expand(uint64_t *skey,
+	unsigned num_rounds, const uint64_t *comp_skey)
+{
+	unsigned u, v, n;
+
+	n = (num_rounds + 1) << 1;
+	for (u = 0, v = 0; u < n; u ++, v += 4) {
+		uint64_t x0, x1, x2, x3;
+
+		x0 = x1 = x2 = x3 = comp_skey[u];
+		x0 &= (uint64_t)0x1111111111111111;
+		x1 &= (uint64_t)0x2222222222222222;
+		x2 &= (uint64_t)0x4444444444444444;
+		x3 &= (uint64_t)0x8888888888888888;
+		x1 >>= 1;
+		x2 >>= 2;
+		x3 >>= 3;
+		skey[v + 0] = (x0 << 4) - x0;
+		skey[v + 1] = (x1 << 4) - x1;
+		skey[v + 2] = (x2 << 4) - x2;
+		skey[v + 3] = (x3 << 4) - x3;
+	}
+}
diff --git a/third_party/bearssl/src/aes_ct64_cbcdec.c b/third_party/bearssl/src/aes_ct64_cbcdec.c
new file mode 100644
index 0000000..5a7360b
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_cbcdec.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcdec_init(br_aes_ct64_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_cbcdec_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcdec_run(const br_aes_ct64_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint64_t sk_exp[120];
+	uint32_t ivw[4];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	br_range_dec32le(ivw, 4, iv);
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w1[16], w2[16];
+		int i;
+
+		if (len >= 64) {
+			br_range_dec32le(w1, 16, buf);
+		} else {
+			br_range_dec32le(w1, len >> 2, buf);
+		}
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w1 + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w2 + (i << 2), q[i], q[i + 4]);
+		}
+		for (i = 0; i < 4; i ++) {
+			w2[i] ^= ivw[i];
+		}
+		if (len >= 64) {
+			for (i = 4; i < 16; i ++) {
+				w2[i] ^= w1[i - 4];
+			}
+			memcpy(ivw, w1 + 12, sizeof ivw);
+			br_range_enc32le(buf, w2, 16);
+		} else {
+			int j;
+
+			j = (int)(len >> 2);
+			for (i = 4; i < j; i ++) {
+				w2[i] ^= w1[i - 4];
+			}
+			memcpy(ivw, w1 + j - 4, sizeof ivw);
+			br_range_enc32le(buf, w2, j);
+			break;
+		}
+		buf += 64;
+		len -= 64;
+	}
+	br_range_enc32le(iv, ivw, 4);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_ct64_cbcdec_vtable = {
+	sizeof(br_aes_ct64_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_ct64_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_ct64_cbcdec_run
+};
diff --git a/third_party/bearssl/src/aes_ct64_cbcenc.c b/third_party/bearssl/src/aes_ct64_cbcenc.c
new file mode 100644
index 0000000..6cb9dec
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_cbcenc.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcenc_init(br_aes_ct64_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_cbcenc_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_cbcenc_run(const br_aes_ct64_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint64_t sk_exp[120];
+	uint32_t ivw[4];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	br_range_dec32le(ivw, 4, iv);
+	buf = data;
+	while (len > 0) {
+		uint32_t w[4];
+		uint64_t q[8];
+
+		w[0] = ivw[0] ^ br_dec32le(buf);
+		w[1] = ivw[1] ^ br_dec32le(buf + 4);
+		w[2] = ivw[2] ^ br_dec32le(buf + 8);
+		w[3] = ivw[3] ^ br_dec32le(buf + 12);
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		memcpy(ivw, w, sizeof w);
+		br_enc32le(buf, w[0]);
+		br_enc32le(buf + 4, w[1]);
+		br_enc32le(buf + 8, w[2]);
+		br_enc32le(buf + 12, w[3]);
+		buf += 16;
+		len -= 16;
+	}
+	br_range_enc32le(iv, ivw, 4);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_ct64_cbcenc_vtable = {
+	sizeof(br_aes_ct64_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_ct64_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_ct64_cbcenc_run
+};
diff --git a/third_party/bearssl/src/aes_ct64_ctr.c b/third_party/bearssl/src/aes_ct64_ctr.c
new file mode 100644
index 0000000..1275873
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_ctr.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctr_init(br_aes_ct64_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_ctr_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_ct64_ctr_run(const br_aes_ct64_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint32_t ivw[16];
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	br_range_dec32le(ivw, 3, iv);
+	memcpy(ivw + 4, ivw, 3 * sizeof(uint32_t));
+	memcpy(ivw + 8, ivw, 3 * sizeof(uint32_t));
+	memcpy(ivw + 12, ivw, 3 * sizeof(uint32_t));
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w[16];
+		unsigned char tmp[64];
+		int i;
+
+		/*
+		 * TODO: see if we can save on the first br_aes_ct64_ortho()
+		 * call, since iv0/iv1/iv2 are constant for the whole run.
+		 */
+		memcpy(w, ivw, sizeof ivw);
+		w[3] = br_swap32(cc);
+		w[7] = br_swap32(cc + 1);
+		w[11] = br_swap32(cc + 2);
+		w[15] = br_swap32(cc + 3);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w + (i << 2), q[i], q[i + 4]);
+		}
+		br_range_enc32le(tmp, w, 16);
+		if (len <= 64) {
+			xorbuf(buf, tmp, len);
+			cc += (uint32_t)len >> 4;
+			break;
+		}
+		xorbuf(buf, tmp, 64);
+		buf += 64;
+		len -= 64;
+		cc += 4;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_ct64_ctr_vtable = {
+	sizeof(br_aes_ct64_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_ct64_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_ct64_ctr_run
+};
diff --git a/third_party/bearssl/src/aes_ct64_ctrcbc.c b/third_party/bearssl/src/aes_ct64_ctrcbc.c
new file mode 100644
index 0000000..21bb8ef
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_ctrcbc.c
@@ -0,0 +1,433 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_init(br_aes_ct64_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct64_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_ct64_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_ctr(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint64_t q[8];
+		uint32_t w[16];
+		unsigned char tmp[64];
+		int i, j;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		j = (len >= 64) ? 16 : (int)(len >> 2);
+		for (i = 0; i < j; i += 4) {
+			uint32_t carry;
+
+			w[i + 0] = br_swap32(iv0);
+			w[i + 1] = br_swap32(iv1);
+			w[i + 2] = br_swap32(iv2);
+			w[i + 3] = br_swap32(iv3);
+			iv3 ++;
+			carry = ~(iv3 | -iv3) >> 31;
+			iv2 += carry;
+			carry &= -(~(iv2 | -iv2) >> 31);
+			iv1 += carry;
+			carry &= -(~(iv1 | -iv1) >> 31);
+			iv0 += carry;
+		}
+		memset(w + i, 0, (16 - i) * sizeof(uint32_t));
+
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_in(
+				&q[i], &q[i + 4], w + (i << 2));
+		}
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		for (i = 0; i < 4; i ++) {
+			br_aes_ct64_interleave_out(
+				w + (i << 2), q[i], q[i + 4]);
+		}
+
+		br_range_enc32le(tmp, w, 16);
+		if (len <= 64) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 64);
+		buf += 64;
+		len -= 64;
+	}
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_mac(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t q[8];
+	uint64_t sk_exp[120];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[4];
+
+		w[0] = cm0 ^ br_dec32le(buf +  0);
+		w[1] = cm1 ^ br_dec32le(buf +  4);
+		w[2] = cm2 ^ br_dec32le(buf +  8);
+		w[3] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+
+		cm0 = w[0];
+		cm1 = w[1];
+		cm2 = w[2];
+		cm3 = w[3];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_encrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	/*
+	 * When encrypting, the CBC-MAC processing must be lagging by
+	 * one block, since it operates on the encrypted values, so
+	 * it must wait for that encryption to complete.
+	 */
+
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t sk_exp[120];
+	uint64_t q[8];
+	int first_iter;
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	first_iter = 1;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[8], carry;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		w[0] = br_swap32(iv0);
+		w[1] = br_swap32(iv1);
+		w[2] = br_swap32(iv2);
+		w[3] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The block for CBC-MAC.
+		 */
+		w[4] = cm0;
+		w[5] = cm1;
+		w[6] = cm2;
+		w[7] = cm3;
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+		/*
+		 * We do the XOR with the plaintext in 32-bit registers,
+		 * so that the value are available for CBC-MAC processing
+		 * as well.
+		 */
+		w[0] ^= br_dec32le(buf +  0);
+		w[1] ^= br_dec32le(buf +  4);
+		w[2] ^= br_dec32le(buf +  8);
+		w[3] ^= br_dec32le(buf + 12);
+		br_enc32le(buf +  0, w[0]);
+		br_enc32le(buf +  4, w[1]);
+		br_enc32le(buf +  8, w[2]);
+		br_enc32le(buf + 12, w[3]);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * We set the cm* values to the block to encrypt in the
+		 * next iteration.
+		 */
+		if (first_iter) {
+			first_iter = 0;
+			cm0 ^= w[0];
+			cm1 ^= w[1];
+			cm2 ^= w[2];
+			cm3 ^= w[3];
+		} else {
+			cm0 = w[0] ^ w[4];
+			cm1 = w[1] ^ w[5];
+			cm2 = w[2] ^ w[6];
+			cm3 = w[3] ^ w[7];
+		}
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			w[0] = cm0;
+			w[1] = cm1;
+			w[2] = cm2;
+			w[3] = cm3;
+			br_aes_ct64_interleave_in(&q[0], &q[4], w);
+			br_aes_ct64_ortho(q);
+			br_aes_ct64_bitslice_encrypt(
+				ctx->num_rounds, sk_exp, q);
+			br_aes_ct64_ortho(q);
+			br_aes_ct64_interleave_out(w, q[0], q[4]);
+			cm0 = w[0];
+			cm1 = w[1];
+			cm2 = w[2];
+			cm3 = w[3];
+			break;
+		}
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct64_ctrcbc_decrypt(const br_aes_ct64_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint64_t sk_exp[120];
+	uint64_t q[8];
+
+	br_aes_ct64_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	memset(q, 0, sizeof q);
+	while (len > 0) {
+		uint32_t w[8], carry;
+		unsigned char tmp[16];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		w[0] = br_swap32(iv0);
+		w[1] = br_swap32(iv1);
+		w[2] = br_swap32(iv2);
+		w[3] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The block for CBC-MAC.
+		 */
+		w[4] = cm0 ^ br_dec32le(buf +  0);
+		w[5] = cm1 ^ br_dec32le(buf +  4);
+		w[6] = cm2 ^ br_dec32le(buf +  8);
+		w[7] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct64_interleave_in(&q[0], &q[4], w);
+		br_aes_ct64_interleave_in(&q[1], &q[5], w + 4);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct64_ortho(q);
+		br_aes_ct64_interleave_out(w, q[0], q[4]);
+		br_aes_ct64_interleave_out(w + 4, q[1], q[5]);
+
+		br_enc32le(tmp +  0, w[0]);
+		br_enc32le(tmp +  4, w[1]);
+		br_enc32le(tmp +  8, w[2]);
+		br_enc32le(tmp + 12, w[3]);
+		xorbuf(buf, tmp, 16);
+		cm0 = w[4];
+		cm1 = w[5];
+		cm2 = w[6];
+		cm3 = w[7];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct64_ctrcbc_vtable = {
+	sizeof(br_aes_ct64_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_ct64_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_ct64_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_ct64_ctrcbc_mac
+};
diff --git a/third_party/bearssl/src/aes_ct64_dec.c b/third_party/bearssl/src/aes_ct64_dec.c
new file mode 100644
index 0000000..ab00e09
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_dec.c
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_invSbox(uint64_t *q)
+{
+	/*
+	 * See br_aes_ct_bitslice_invSbox(). This is the natural extension
+	 * to 64-bit registers.
+	 */
+	uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+
+	br_aes_ct64_bitslice_Sbox(q);
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+}
+
+static void
+add_round_key(uint64_t *q, const uint64_t *sk)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		q[i] ^= sk[i];
+	}
+}
+
+static void
+inv_shift_rows(uint64_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint64_t x;
+
+		x = q[i];
+		q[i] = (x & (uint64_t)0x000000000000FFFF)
+			| ((x & (uint64_t)0x000000000FFF0000) << 4)
+			| ((x & (uint64_t)0x00000000F0000000) >> 12)
+			| ((x & (uint64_t)0x000000FF00000000) << 8)
+			| ((x & (uint64_t)0x0000FF0000000000) >> 8)
+			| ((x & (uint64_t)0x000F000000000000) << 12)
+			| ((x & (uint64_t)0xFFF0000000000000) >> 4);
+	}
+}
+
+static inline uint64_t
+rotr32(uint64_t x)
+{
+	return (x << 32) | (x >> 32);
+}
+
+static void
+inv_mix_columns(uint64_t *q)
+{
+	uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 16) | (q0 << 48);
+	r1 = (q1 >> 16) | (q1 << 48);
+	r2 = (q2 >> 16) | (q2 << 48);
+	r3 = (q3 >> 16) | (q3 << 48);
+	r4 = (q4 >> 16) | (q4 << 48);
+	r5 = (q5 >> 16) | (q5 << 48);
+	r6 = (q6 >> 16) | (q6 << 48);
+	r7 = (q7 >> 16) | (q7 << 48);
+
+	q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr32(q0 ^ q5 ^ q6 ^ r0 ^ r5);
+	q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
+	q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr32(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
+	q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr32(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
+	q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
+	q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr32(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
+	q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr32(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
+	q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr32(q4 ^ q5 ^ q7 ^ r4 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_decrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey + (num_rounds << 3));
+	for (u = num_rounds - 1; u > 0; u --) {
+		inv_shift_rows(q);
+		br_aes_ct64_bitslice_invSbox(q);
+		add_round_key(q, skey + (u << 3));
+		inv_mix_columns(q);
+	}
+	inv_shift_rows(q);
+	br_aes_ct64_bitslice_invSbox(q);
+	add_round_key(q, skey);
+}
diff --git a/third_party/bearssl/src/aes_ct64_enc.c b/third_party/bearssl/src/aes_ct64_enc.c
new file mode 100644
index 0000000..78631ce
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct64_enc.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline void
+add_round_key(uint64_t *q, const uint64_t *sk)
+{
+	q[0] ^= sk[0];
+	q[1] ^= sk[1];
+	q[2] ^= sk[2];
+	q[3] ^= sk[3];
+	q[4] ^= sk[4];
+	q[5] ^= sk[5];
+	q[6] ^= sk[6];
+	q[7] ^= sk[7];
+}
+
+static inline void
+shift_rows(uint64_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint64_t x;
+
+		x = q[i];
+		q[i] = (x & (uint64_t)0x000000000000FFFF)
+			| ((x & (uint64_t)0x00000000FFF00000) >> 4)
+			| ((x & (uint64_t)0x00000000000F0000) << 12)
+			| ((x & (uint64_t)0x0000FF0000000000) >> 8)
+			| ((x & (uint64_t)0x000000FF00000000) << 8)
+			| ((x & (uint64_t)0xF000000000000000) >> 12)
+			| ((x & (uint64_t)0x0FFF000000000000) << 4);
+	}
+}
+
+static inline uint64_t
+rotr32(uint64_t x)
+{
+	return (x << 32) | (x >> 32);
+}
+
+static inline void
+mix_columns(uint64_t *q)
+{
+	uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 16) | (q0 << 48);
+	r1 = (q1 >> 16) | (q1 << 48);
+	r2 = (q2 >> 16) | (q2 << 48);
+	r3 = (q3 >> 16) | (q3 << 48);
+	r4 = (q4 >> 16) | (q4 << 48);
+	r5 = (q5 >> 16) | (q5 << 48);
+	r6 = (q6 >> 16) | (q6 << 48);
+	r7 = (q7 >> 16) | (q7 << 48);
+
+	q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0);
+	q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1);
+	q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2);
+	q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3);
+	q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4);
+	q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5);
+	q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6);
+	q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct64_bitslice_encrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey);
+	for (u = 1; u < num_rounds; u ++) {
+		br_aes_ct64_bitslice_Sbox(q);
+		shift_rows(q);
+		mix_columns(q);
+		add_round_key(q, skey + (u << 3));
+	}
+	br_aes_ct64_bitslice_Sbox(q);
+	shift_rows(q);
+	add_round_key(q, skey + (num_rounds << 3));
+}
diff --git a/third_party/bearssl/src/aes_ct_cbcdec.c b/third_party/bearssl/src/aes_ct_cbcdec.c
new file mode 100644
index 0000000..522645a
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_cbcdec.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcdec_init(br_aes_ct_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_cbcdec_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcdec_run(const br_aes_ct_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	iv0 = br_dec32le(ivbuf);
+	iv1 = br_dec32le(ivbuf + 4);
+	iv2 = br_dec32le(ivbuf + 8);
+	iv3 = br_dec32le(ivbuf + 12);
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], sq[8];
+
+		q[0] = br_dec32le(buf);
+		q[2] = br_dec32le(buf + 4);
+		q[4] = br_dec32le(buf + 8);
+		q[6] = br_dec32le(buf + 12);
+		if (len >= 32) {
+			q[1] = br_dec32le(buf + 16);
+			q[3] = br_dec32le(buf + 20);
+			q[5] = br_dec32le(buf + 24);
+			q[7] = br_dec32le(buf + 28);
+		} else {
+			q[1] = 0;
+			q[3] = 0;
+			q[5] = 0;
+			q[7] = 0;
+		}
+		memcpy(sq, q, sizeof q);
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_decrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+		br_enc32le(buf, q[0] ^ iv0);
+		br_enc32le(buf + 4, q[2] ^ iv1);
+		br_enc32le(buf + 8, q[4] ^ iv2);
+		br_enc32le(buf + 12, q[6] ^ iv3);
+		if (len < 32) {
+			iv0 = sq[0];
+			iv1 = sq[2];
+			iv2 = sq[4];
+			iv3 = sq[6];
+			break;
+		}
+		br_enc32le(buf + 16, q[1] ^ sq[0]);
+		br_enc32le(buf + 20, q[3] ^ sq[2]);
+		br_enc32le(buf + 24, q[5] ^ sq[4]);
+		br_enc32le(buf + 28, q[7] ^ sq[6]);
+		iv0 = sq[1];
+		iv1 = sq[3];
+		iv2 = sq[5];
+		iv3 = sq[7];
+		buf += 32;
+		len -= 32;
+	}
+	br_enc32le(ivbuf, iv0);
+	br_enc32le(ivbuf + 4, iv1);
+	br_enc32le(ivbuf + 8, iv2);
+	br_enc32le(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_ct_cbcdec_vtable = {
+	sizeof(br_aes_ct_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_ct_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_ct_cbcdec_run
+};
diff --git a/third_party/bearssl/src/aes_ct_cbcenc.c b/third_party/bearssl/src/aes_ct_cbcenc.c
new file mode 100644
index 0000000..cb85977
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_cbcenc.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcenc_init(br_aes_ct_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_cbcenc_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_cbcenc_run(const br_aes_ct_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t q[8];
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	q[1] = 0;
+	q[3] = 0;
+	q[5] = 0;
+	q[7] = 0;
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	iv0 = br_dec32le(ivbuf);
+	iv1 = br_dec32le(ivbuf + 4);
+	iv2 = br_dec32le(ivbuf + 8);
+	iv3 = br_dec32le(ivbuf + 12);
+	buf = data;
+	while (len > 0) {
+		q[0] = iv0 ^ br_dec32le(buf);
+		q[2] = iv1 ^ br_dec32le(buf + 4);
+		q[4] = iv2 ^ br_dec32le(buf + 8);
+		q[6] = iv3 ^ br_dec32le(buf + 12);
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+		iv0 = q[0];
+		iv1 = q[2];
+		iv2 = q[4];
+		iv3 = q[6];
+		br_enc32le(buf, iv0);
+		br_enc32le(buf + 4, iv1);
+		br_enc32le(buf + 8, iv2);
+		br_enc32le(buf + 12, iv3);
+		buf += 16;
+		len -= 16;
+	}
+	br_enc32le(ivbuf, iv0);
+	br_enc32le(ivbuf + 4, iv1);
+	br_enc32le(ivbuf + 8, iv2);
+	br_enc32le(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_ct_cbcenc_vtable = {
+	sizeof(br_aes_ct_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_ct_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_ct_cbcenc_run
+};
diff --git a/third_party/bearssl/src/aes_ct_ctr.c b/third_party/bearssl/src/aes_ct_ctr.c
new file mode 100644
index 0000000..f407689
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_ctr.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctr_init(br_aes_ct_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_ctr_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_ct_ctr_run(const br_aes_ct_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	const unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	iv0 = br_dec32le(ivbuf);
+	iv1 = br_dec32le(ivbuf + 4);
+	iv2 = br_dec32le(ivbuf + 8);
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8];
+		unsigned char tmp[32];
+
+		/*
+		 * TODO: see if we can save on the first br_aes_ct_ortho()
+		 * call, since iv0/iv1/iv2 are constant for the whole run.
+		 */
+		q[0] = q[1] = iv0;
+		q[2] = q[3] = iv1;
+		q[4] = q[5] = iv2;
+		q[6] = br_swap32(cc);
+		q[7] = br_swap32(cc + 1);
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+		br_enc32le(tmp, q[0]);
+		br_enc32le(tmp + 4, q[2]);
+		br_enc32le(tmp + 8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		br_enc32le(tmp + 16, q[1]);
+		br_enc32le(tmp + 20, q[3]);
+		br_enc32le(tmp + 24, q[5]);
+		br_enc32le(tmp + 28, q[7]);
+
+		if (len <= 32) {
+			xorbuf(buf, tmp, len);
+			cc ++;
+			if (len > 16) {
+				cc ++;
+			}
+			break;
+		}
+		xorbuf(buf, tmp, 32);
+		buf += 32;
+		len -= 32;
+		cc += 2;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_ct_ctr_vtable = {
+	sizeof(br_aes_ct_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_ct_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_ct_ctr_run
+};
diff --git a/third_party/bearssl/src/aes_ct_ctrcbc.c b/third_party/bearssl/src/aes_ct_ctrcbc.c
new file mode 100644
index 0000000..8ae9fc7
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_ctrcbc.c
@@ -0,0 +1,422 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_init(br_aes_ct_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_ct_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_ct_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_ctr(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], carry;
+		unsigned char tmp[32];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+		q[1] = br_swap32(iv0);
+		q[3] = br_swap32(iv1);
+		q[5] = br_swap32(iv2);
+		q[7] = br_swap32(iv3);
+		if (len > 16) {
+			iv3 ++;
+			carry = ~(iv3 | -iv3) >> 31;
+			iv2 += carry;
+			carry &= -(~(iv2 | -iv2) >> 31);
+			iv1 += carry;
+			carry &= -(~(iv1 | -iv1) >> 31);
+			iv0 += carry;
+		}
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		br_enc32le(tmp, q[0]);
+		br_enc32le(tmp + 4, q[2]);
+		br_enc32le(tmp + 8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		br_enc32le(tmp + 16, q[1]);
+		br_enc32le(tmp + 20, q[3]);
+		br_enc32le(tmp + 24, q[5]);
+		br_enc32le(tmp + 28, q[7]);
+
+		if (len <= 32) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 32);
+		buf += 32;
+		len -= 32;
+	}
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_mac(const br_aes_ct_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t q[8];
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	buf = data;
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+	q[1] = 0;
+	q[3] = 0;
+	q[5] = 0;
+	q[7] = 0;
+
+	while (len > 0) {
+		q[0] = cm0 ^ br_dec32le(buf +  0);
+		q[2] = cm1 ^ br_dec32le(buf +  4);
+		q[4] = cm2 ^ br_dec32le(buf +  8);
+		q[6] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		cm0 = q[0];
+		cm1 = q[2];
+		cm2 = q[4];
+		cm3 = q[6];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_encrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	/*
+	 * When encrypting, the CBC-MAC processing must be lagging by
+	 * one block, since it operates on the encrypted values, so
+	 * it must wait for that encryption to complete.
+	 */
+
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t sk_exp[120];
+	int first_iter;
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	first_iter = 1;
+	while (len > 0) {
+		uint32_t q[8], carry;
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The odd values are used for CBC-MAC.
+		 */
+		q[1] = cm0;
+		q[3] = cm1;
+		q[5] = cm2;
+		q[7] = cm3;
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		/*
+		 * We do the XOR with the plaintext in 32-bit registers,
+		 * so that the value are available for CBC-MAC processing
+		 * as well.
+		 */
+		q[0] ^= br_dec32le(buf +  0);
+		q[2] ^= br_dec32le(buf +  4);
+		q[4] ^= br_dec32le(buf +  8);
+		q[6] ^= br_dec32le(buf + 12);
+		br_enc32le(buf +  0, q[0]);
+		br_enc32le(buf +  4, q[2]);
+		br_enc32le(buf +  8, q[4]);
+		br_enc32le(buf + 12, q[6]);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * We set the cm* values to the block to encrypt in the
+		 * next iteration.
+		 */
+		if (first_iter) {
+			first_iter = 0;
+			cm0 ^= q[0];
+			cm1 ^= q[2];
+			cm2 ^= q[4];
+			cm3 ^= q[6];
+		} else {
+			cm0 = q[0] ^ q[1];
+			cm1 = q[2] ^ q[3];
+			cm2 = q[4] ^ q[5];
+			cm3 = q[6] ^ q[7];
+		}
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			q[0] = cm0;
+			q[2] = cm1;
+			q[4] = cm2;
+			q[6] = cm3;
+			br_aes_ct_ortho(q);
+			br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+			br_aes_ct_ortho(q);
+			cm0 = q[0];
+			cm1 = q[2];
+			cm2 = q[4];
+			cm3 = q[6];
+			break;
+		}
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_ct_ctrcbc_decrypt(const br_aes_ct_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char *ivbuf;
+	uint32_t iv0, iv1, iv2, iv3;
+	uint32_t cm0, cm1, cm2, cm3;
+	uint32_t sk_exp[120];
+
+	br_aes_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+
+	/*
+	 * We keep the counter as four 32-bit values, with big-endian
+	 * convention, because that's what is expected for purposes of
+	 * incrementing the counter value.
+	 */
+	ivbuf = ctr;
+	iv0 = br_dec32be(ivbuf +  0);
+	iv1 = br_dec32be(ivbuf +  4);
+	iv2 = br_dec32be(ivbuf +  8);
+	iv3 = br_dec32be(ivbuf + 12);
+
+	/*
+	 * The current CBC-MAC value is kept in little-endian convention.
+	 */
+	cm0 = br_dec32le((unsigned char *)cbcmac +  0);
+	cm1 = br_dec32le((unsigned char *)cbcmac +  4);
+	cm2 = br_dec32le((unsigned char *)cbcmac +  8);
+	cm3 = br_dec32le((unsigned char *)cbcmac + 12);
+
+	buf = data;
+	while (len > 0) {
+		uint32_t q[8], carry;
+		unsigned char tmp[16];
+
+		/*
+		 * The bitslice implementation expects values in
+		 * little-endian convention, so we have to byteswap them.
+		 */
+		q[0] = br_swap32(iv0);
+		q[2] = br_swap32(iv1);
+		q[4] = br_swap32(iv2);
+		q[6] = br_swap32(iv3);
+		iv3 ++;
+		carry = ~(iv3 | -iv3) >> 31;
+		iv2 += carry;
+		carry &= -(~(iv2 | -iv2) >> 31);
+		iv1 += carry;
+		carry &= -(~(iv1 | -iv1) >> 31);
+		iv0 += carry;
+
+		/*
+		 * The odd values are used for CBC-MAC.
+		 */
+		q[1] = cm0 ^ br_dec32le(buf +  0);
+		q[3] = cm1 ^ br_dec32le(buf +  4);
+		q[5] = cm2 ^ br_dec32le(buf +  8);
+		q[7] = cm3 ^ br_dec32le(buf + 12);
+
+		br_aes_ct_ortho(q);
+		br_aes_ct_bitslice_encrypt(ctx->num_rounds, sk_exp, q);
+		br_aes_ct_ortho(q);
+
+		br_enc32le(tmp +  0, q[0]);
+		br_enc32le(tmp +  4, q[2]);
+		br_enc32le(tmp +  8, q[4]);
+		br_enc32le(tmp + 12, q[6]);
+		xorbuf(buf, tmp, 16);
+		cm0 = q[1];
+		cm1 = q[3];
+		cm2 = q[5];
+		cm3 = q[7];
+		buf += 16;
+		len -= 16;
+	}
+
+	br_enc32be(ivbuf +  0, iv0);
+	br_enc32be(ivbuf +  4, iv1);
+	br_enc32be(ivbuf +  8, iv2);
+	br_enc32be(ivbuf + 12, iv3);
+	br_enc32le((unsigned char *)cbcmac +  0, cm0);
+	br_enc32le((unsigned char *)cbcmac +  4, cm1);
+	br_enc32le((unsigned char *)cbcmac +  8, cm2);
+	br_enc32le((unsigned char *)cbcmac + 12, cm3);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_ct_ctrcbc_vtable = {
+	sizeof(br_aes_ct_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_ct_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_ct_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_ct_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_ct_ctrcbc_mac
+};
diff --git a/third_party/bearssl/src/aes_ct_dec.c b/third_party/bearssl/src/aes_ct_dec.c
new file mode 100644
index 0000000..7f32d2b
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_dec.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_invSbox(uint32_t *q)
+{
+	/*
+	 * AES S-box is:
+	 *   S(x) = A(I(x)) ^ 0x63
+	 * where I() is inversion in GF(256), and A() is a linear
+	 * transform (0 is formally defined to be its own inverse).
+	 * Since inversion is an involution, the inverse S-box can be
+	 * computed from the S-box as:
+	 *   iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
+	 * where B() is the inverse of A(). Indeed, for any y in GF(256):
+	 *   iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
+	 *
+	 * Note: we reuse the implementation of the forward S-box,
+	 * instead of duplicating it here, so that total code size is
+	 * lower. By merging the B() transforms into the S-box circuit
+	 * we could make faster CBC decryption, but CBC decryption is
+	 * already quite faster than CBC encryption because we can
+	 * process two blocks in parallel.
+	 */
+	uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+
+	br_aes_ct_bitslice_Sbox(q);
+
+	q0 = ~q[0];
+	q1 = ~q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = ~q[5];
+	q6 = ~q[6];
+	q7 = q[7];
+	q[7] = q1 ^ q4 ^ q6;
+	q[6] = q0 ^ q3 ^ q5;
+	q[5] = q7 ^ q2 ^ q4;
+	q[4] = q6 ^ q1 ^ q3;
+	q[3] = q5 ^ q0 ^ q2;
+	q[2] = q4 ^ q7 ^ q1;
+	q[1] = q3 ^ q6 ^ q0;
+	q[0] = q2 ^ q5 ^ q7;
+}
+
+static void
+add_round_key(uint32_t *q, const uint32_t *sk)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		q[i] ^= sk[i];
+	}
+}
+
+static void
+inv_shift_rows(uint32_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint32_t x;
+
+		x = q[i];
+		q[i] = (x & 0x000000FF)
+			| ((x & 0x00003F00) << 2) | ((x & 0x0000C000) >> 6)
+			| ((x & 0x000F0000) << 4) | ((x & 0x00F00000) >> 4)
+			| ((x & 0x03000000) << 6) | ((x & 0xFC000000) >> 2);
+	}
+}
+
+static inline uint32_t
+rotr16(uint32_t x)
+{
+	return (x << 16) | (x >> 16);
+}
+
+static void
+inv_mix_columns(uint32_t *q)
+{
+	uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 8) | (q0 << 24);
+	r1 = (q1 >> 8) | (q1 << 24);
+	r2 = (q2 >> 8) | (q2 << 24);
+	r3 = (q3 >> 8) | (q3 << 24);
+	r4 = (q4 >> 8) | (q4 << 24);
+	r5 = (q5 >> 8) | (q5 << 24);
+	r6 = (q6 >> 8) | (q6 << 24);
+	r7 = (q7 >> 8) | (q7 << 24);
+
+	q[0] = q5 ^ q6 ^ q7 ^ r0 ^ r5 ^ r7 ^ rotr16(q0 ^ q5 ^ q6 ^ r0 ^ r5);
+	q[1] = q0 ^ q5 ^ r0 ^ r1 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q5 ^ q7 ^ r1 ^ r5 ^ r6);
+	q[2] = q0 ^ q1 ^ q6 ^ r1 ^ r2 ^ r6 ^ r7 ^ rotr16(q0 ^ q2 ^ q6 ^ r2 ^ r6 ^ r7);
+	q[3] = q0 ^ q1 ^ q2 ^ q5 ^ q6 ^ r0 ^ r2 ^ r3 ^ r5 ^ rotr16(q0 ^ q1 ^ q3 ^ q5 ^ q6 ^ q7 ^ r0 ^ r3 ^ r5 ^ r7);
+	q[4] = q1 ^ q2 ^ q3 ^ q5 ^ r1 ^ r3 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q1 ^ q2 ^ q4 ^ q5 ^ q7 ^ r1 ^ r4 ^ r5 ^ r6);
+	q[5] = q2 ^ q3 ^ q4 ^ q6 ^ r2 ^ r4 ^ r5 ^ r6 ^ r7 ^ rotr16(q2 ^ q3 ^ q5 ^ q6 ^ r2 ^ r5 ^ r6 ^ r7);
+	q[6] = q3 ^ q4 ^ q5 ^ q7 ^ r3 ^ r5 ^ r6 ^ r7 ^ rotr16(q3 ^ q4 ^ q6 ^ q7 ^ r3 ^ r6 ^ r7);
+	q[7] = q4 ^ q5 ^ q6 ^ r4 ^ r6 ^ r7 ^ rotr16(q4 ^ q5 ^ q7 ^ r4 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_decrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey + (num_rounds << 3));
+	for (u = num_rounds - 1; u > 0; u --) {
+		inv_shift_rows(q);
+		br_aes_ct_bitslice_invSbox(q);
+		add_round_key(q, skey + (u << 3));
+		inv_mix_columns(q);
+	}
+	inv_shift_rows(q);
+	br_aes_ct_bitslice_invSbox(q);
+	add_round_key(q, skey);
+}
diff --git a/third_party/bearssl/src/aes_ct_enc.c b/third_party/bearssl/src/aes_ct_enc.c
new file mode 100644
index 0000000..089bf35
--- /dev/null
+++ b/third_party/bearssl/src/aes_ct_enc.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline void
+add_round_key(uint32_t *q, const uint32_t *sk)
+{
+	q[0] ^= sk[0];
+	q[1] ^= sk[1];
+	q[2] ^= sk[2];
+	q[3] ^= sk[3];
+	q[4] ^= sk[4];
+	q[5] ^= sk[5];
+	q[6] ^= sk[6];
+	q[7] ^= sk[7];
+}
+
+static inline void
+shift_rows(uint32_t *q)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		uint32_t x;
+
+		x = q[i];
+		q[i] = (x & 0x000000FF)
+			| ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6)
+			| ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4)
+			| ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2);
+	}
+}
+
+static inline uint32_t
+rotr16(uint32_t x)
+{
+	return (x << 16) | (x >> 16);
+}
+
+static inline void
+mix_columns(uint32_t *q)
+{
+	uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
+	uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
+
+	q0 = q[0];
+	q1 = q[1];
+	q2 = q[2];
+	q3 = q[3];
+	q4 = q[4];
+	q5 = q[5];
+	q6 = q[6];
+	q7 = q[7];
+	r0 = (q0 >> 8) | (q0 << 24);
+	r1 = (q1 >> 8) | (q1 << 24);
+	r2 = (q2 >> 8) | (q2 << 24);
+	r3 = (q3 >> 8) | (q3 << 24);
+	r4 = (q4 >> 8) | (q4 << 24);
+	r5 = (q5 >> 8) | (q5 << 24);
+	r6 = (q6 >> 8) | (q6 << 24);
+	r7 = (q7 >> 8) | (q7 << 24);
+
+	q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0);
+	q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1);
+	q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2);
+	q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3);
+	q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4);
+	q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5);
+	q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6);
+	q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7);
+}
+
+/* see inner.h */
+void
+br_aes_ct_bitslice_encrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q)
+{
+	unsigned u;
+
+	add_round_key(q, skey);
+	for (u = 1; u < num_rounds; u ++) {
+		br_aes_ct_bitslice_Sbox(q);
+		shift_rows(q);
+		mix_columns(q);
+		add_round_key(q, skey + (u << 3));
+	}
+	br_aes_ct_bitslice_Sbox(q);
+	shift_rows(q);
+	add_round_key(q, skey + (num_rounds << 3));
+}
diff --git a/third_party/bearssl/src/aes_pwr8.c b/third_party/bearssl/src/aes_pwr8.c
new file mode 100644
index 0000000..b2c63c3
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+/*
+ * This code contains the AES key schedule implementation using the
+ * POWER8 opcodes.
+ */
+
+#if BR_POWER8
+
+static void
+key_schedule_128(unsigned char *sk, const unsigned char *key)
+{
+	long cc;
+
+	static const uint32_t fmod[] = { 0x11B, 0x11B, 0x11B, 0x11B };
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+
+	/*
+	 * We use the VSX instructions for loading and storing the
+	 * key/subkeys, since they support unaligned accesses. The rest
+	 * of the computation is VMX only. VMX register 0 is VSX
+	 * register 32.
+	 */
+	asm volatile (
+
+		/*
+		 * v0 = all-zero word
+		 * v1 = constant -8 / +8, copied into four words
+		 * v2 = current subkey
+		 * v3 = Rcon (x4 words)
+		 * v6 = constant 8, copied into four words
+		 * v7 = constant 0x11B, copied into four words
+		 * v8 = constant for byteswapping words
+		 */
+		vspltisw(0, 0)
+#if BR_POWER8_LE
+		vspltisw(1, -8)
+#else
+		vspltisw(1, 8)
+#endif
+		lxvw4x(34, 0, %[key])
+		vspltisw(3, 1)
+		vspltisw(6, 8)
+		lxvw4x(39, 0, %[fmod])
+#if BR_POWER8_LE
+		lxvw4x(40, 0, %[idx2be])
+#endif
+
+		/*
+		 * First subkey is a copy of the key itself.
+		 */
+#if BR_POWER8_LE
+		vperm(4, 2, 2, 8)
+		stxvw4x(36, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+		/*
+		 * Loop must run 10 times.
+		 */
+		li(%[cc], 10)
+		mtctr(%[cc])
+	label(loop)
+		/* Increment subkey address */
+		addi(%[sk], %[sk], 16)
+
+		/* Compute SubWord(RotWord(temp)) xor Rcon  (into v4, splat) */
+		vrlw(4, 2, 1)
+		vsbox(4, 4)
+#if BR_POWER8_LE
+		vxor(4, 4, 3)
+#else
+		vsldoi(5, 3, 0, 3)
+		vxor(4, 4, 5)
+#endif
+		vspltw(4, 4, 3)
+
+		/* XOR words for next subkey */
+		vsldoi(5, 0, 2, 12)
+		vxor(2, 2, 5)
+		vsldoi(5, 0, 2, 12)
+		vxor(2, 2, 5)
+		vsldoi(5, 0, 2, 12)
+		vxor(2, 2, 5)
+		vxor(2, 2, 4)
+
+		/* Store next subkey */
+#if BR_POWER8_LE
+		vperm(4, 2, 2, 8)
+		stxvw4x(36, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+		/* Update Rcon */
+		vadduwm(3, 3, 3)
+		vsrw(4, 3, 6)
+		vsubuwm(4, 0, 4)
+		vand(4, 4, 7)
+		vxor(3, 3, 4)
+
+		bdnz(loop)
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key), [fmod] "b" (fmod)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "ctr", "memory"
+	);
+}
+
+static void
+key_schedule_192(unsigned char *sk, const unsigned char *key)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+
+	/*
+	 * We use the VSX instructions for loading and storing the
+	 * key/subkeys, since they support unaligned accesses. The rest
+	 * of the computation is VMX only. VMX register 0 is VSX
+	 * register 32.
+	 */
+	asm volatile (
+
+		/*
+		 * v0 = all-zero word
+		 * v1 = constant -8 / +8, copied into four words
+		 * v2, v3 = current subkey
+		 * v5 = Rcon (x4 words) (already shifted on big-endian)
+		 * v6 = constant 8, copied into four words
+		 * v8 = constant for byteswapping words
+		 *
+		 * The left two words of v3 are ignored.
+		 */
+		vspltisw(0, 0)
+#if BR_POWER8_LE
+		vspltisw(1, -8)
+#else
+		vspltisw(1, 8)
+#endif
+		li(%[cc], 8)
+		lxvw4x(34, 0, %[key])
+		lxvw4x(35, %[cc], %[key])
+		vsldoi(3, 3, 0, 8)
+		vspltisw(5, 1)
+#if !BR_POWER8_LE
+		vsldoi(5, 5, 0, 3)
+#endif
+		vspltisw(6, 8)
+#if BR_POWER8_LE
+		lxvw4x(40, 0, %[idx2be])
+#endif
+
+		/*
+		 * Loop must run 8 times. Each iteration produces 256
+		 * bits of subkeys, with a 64-bit overlap.
+		 */
+		li(%[cc], 8)
+		mtctr(%[cc])
+		li(%[cc], 16)
+	label(loop)
+
+		/*
+		 * Last 6 words in v2:v3l. Compute next 6 words into
+		 * v3r:v4.
+		 */
+		vrlw(10, 3, 1)
+		vsbox(10, 10)
+		vxor(10, 10, 5)
+		vspltw(10, 10, 1)
+		vsldoi(11, 0, 10, 8)
+
+		vsldoi(12, 0, 2, 12)
+		vxor(12, 2, 12)
+		vsldoi(13, 0, 12, 12)
+		vxor(12, 12, 13)
+		vsldoi(13, 0, 12, 12)
+		vxor(12, 12, 13)
+
+		vspltw(13, 12, 3)
+		vxor(13, 13, 3)
+		vsldoi(14, 0, 3, 12)
+		vxor(13, 13, 14)
+
+		vsldoi(4, 12, 13, 8)
+		vsldoi(14, 0, 3, 8)
+		vsldoi(3, 14, 12, 8)
+
+		vxor(3, 3, 11)
+		vxor(4, 4, 10)
+
+		/*
+		 * Update Rcon. Since for a 192-bit key, we use only 8
+		 * such constants, we will not hit the field modulus,
+		 * so a simple shift (addition) works well.
+		 */
+		vadduwm(5, 5, 5)
+
+		/*
+		 * Write out the two left 128-bit words
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		vperm(11, 3, 3, 8)
+		stxvw4x(42, 0, %[sk])
+		stxvw4x(43, %[cc], %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+		stxvw4x(35, %[cc], %[sk])
+#endif
+		addi(%[sk], %[sk], 24)
+
+		/*
+		 * Shift words for next iteration.
+		 */
+		vsldoi(2, 3, 4, 8)
+		vsldoi(3, 4, 0, 8)
+
+		bdnz(loop)
+
+		/*
+		 * The loop wrote the first 50 subkey words, but we need
+		 * to produce 52, so we must do one last write.
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		stxvw4x(42, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+  "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
+	);
+}
+
+static void
+key_schedule_256(unsigned char *sk, const unsigned char *key)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+
+	/*
+	 * We use the VSX instructions for loading and storing the
+	 * key/subkeys, since they support unaligned accesses. The rest
+	 * of the computation is VMX only. VMX register 0 is VSX
+	 * register 32.
+	 */
+	asm volatile (
+
+		/*
+		 * v0 = all-zero word
+		 * v1 = constant -8 / +8, copied into four words
+		 * v2, v3 = current subkey
+		 * v6 = Rcon (x4 words) (already shifted on big-endian)
+		 * v7 = constant 8, copied into four words
+		 * v8 = constant for byteswapping words
+		 *
+		 * The left two words of v3 are ignored.
+		 */
+		vspltisw(0, 0)
+#if BR_POWER8_LE
+		vspltisw(1, -8)
+#else
+		vspltisw(1, 8)
+#endif
+		li(%[cc], 16)
+		lxvw4x(34, 0, %[key])
+		lxvw4x(35, %[cc], %[key])
+		vspltisw(6, 1)
+#if !BR_POWER8_LE
+		vsldoi(6, 6, 0, 3)
+#endif
+		vspltisw(7, 8)
+#if BR_POWER8_LE
+		lxvw4x(40, 0, %[idx2be])
+#endif
+
+		/*
+		 * Loop must run 7 times. Each iteration produces two
+		 * subkeys.
+		 */
+		li(%[cc], 7)
+		mtctr(%[cc])
+		li(%[cc], 16)
+	label(loop)
+
+		/*
+		 * Current words are in v2:v3. Compute next word in v4.
+		 */
+		vrlw(10, 3, 1)
+		vsbox(10, 10)
+		vxor(10, 10, 6)
+		vspltw(10, 10, 3)
+
+		vsldoi(4, 0, 2, 12)
+		vxor(4, 2, 4)
+		vsldoi(5, 0, 4, 12)
+		vxor(4, 4, 5)
+		vsldoi(5, 0, 4, 12)
+		vxor(4, 4, 5)
+		vxor(4, 4, 10)
+
+		/*
+		 * Then other word in v5.
+		 */
+		vsbox(10, 4)
+		vspltw(10, 10, 3)
+
+		vsldoi(5, 0, 3, 12)
+		vxor(5, 3, 5)
+		vsldoi(11, 0, 5, 12)
+		vxor(5, 5, 11)
+		vsldoi(11, 0, 5, 12)
+		vxor(5, 5, 11)
+		vxor(5, 5, 10)
+
+		/*
+		 * Update Rcon. Since for a 256-bit key, we use only 7
+		 * such constants, we will not hit the field modulus,
+		 * so a simple shift (addition) works well.
+		 */
+		vadduwm(6, 6, 6)
+
+		/*
+		 * Write out the two left 128-bit words
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		vperm(11, 3, 3, 8)
+		stxvw4x(42, 0, %[sk])
+		stxvw4x(43, %[cc], %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+		stxvw4x(35, %[cc], %[sk])
+#endif
+		addi(%[sk], %[sk], 32)
+
+		/*
+		 * Replace v2:v3 with v4:v5.
+		 */
+		vxor(2, 0, 4)
+		vxor(3, 0, 5)
+
+		bdnz(loop)
+
+		/*
+		 * The loop wrote the first 14 subkeys, but we need 15,
+		 * so we must do an extra write.
+		 */
+#if BR_POWER8_LE
+		vperm(10, 2, 2, 8)
+		stxvw4x(42, 0, %[sk])
+#else
+		stxvw4x(34, 0, %[sk])
+#endif
+
+: [sk] "+b" (sk), [cc] "+b" (cc)
+: [key] "b" (key)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
+  "v8", "v9", "v10", "v11", "v12", "v13", "v14", "ctr", "memory"
+	);
+}
+
+/* see inner.h */
+int
+br_aes_pwr8_supported(void)
+{
+	return 1;
+}
+
+/* see inner.h */
+unsigned
+br_aes_pwr8_keysched(unsigned char *sk, const void *key, size_t len)
+{
+	switch (len) {
+	case 16:
+		key_schedule_128(sk, key);
+		return 10;
+	case 24:
+		key_schedule_192(sk, key);
+		return 12;
+	default:
+		key_schedule_256(sk, key);
+		return 14;
+	}
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_pwr8_cbcdec.c b/third_party/bearssl/src/aes_pwr8_cbcdec.c
new file mode 100644
index 0000000..e535ba6
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8_cbcdec.c
@@ -0,0 +1,670 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_cbcdec_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+cbcdec_128(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v10
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v24.
+		 */
+		lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(24, 24, 24, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next ciphertext words in v16..v19. Also save them
+		 * in v20..v23.
+		 */
+		lxvw4x(48, %[cc0], %[buf])
+		lxvw4x(49, %[cc1], %[buf])
+		lxvw4x(50, %[cc2], %[buf])
+		lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		vand(20, 16, 16)
+		vand(21, 17, 17)
+		vand(22, 18, 18)
+		vand(23, 19, 19)
+
+		/*
+		 * Decrypt the blocks.
+		 */
+		vxor(16, 16, 10)
+		vxor(17, 17, 10)
+		vxor(18, 18, 10)
+		vxor(19, 19, 10)
+		vncipher(16, 16, 9)
+		vncipher(17, 17, 9)
+		vncipher(18, 18, 9)
+		vncipher(19, 19, 9)
+		vncipher(16, 16, 8)
+		vncipher(17, 17, 8)
+		vncipher(18, 18, 8)
+		vncipher(19, 19, 8)
+		vncipher(16, 16, 7)
+		vncipher(17, 17, 7)
+		vncipher(18, 18, 7)
+		vncipher(19, 19, 7)
+		vncipher(16, 16, 6)
+		vncipher(17, 17, 6)
+		vncipher(18, 18, 6)
+		vncipher(19, 19, 6)
+		vncipher(16, 16, 5)
+		vncipher(17, 17, 5)
+		vncipher(18, 18, 5)
+		vncipher(19, 19, 5)
+		vncipher(16, 16, 4)
+		vncipher(17, 17, 4)
+		vncipher(18, 18, 4)
+		vncipher(19, 19, 4)
+		vncipher(16, 16, 3)
+		vncipher(17, 17, 3)
+		vncipher(18, 18, 3)
+		vncipher(19, 19, 3)
+		vncipher(16, 16, 2)
+		vncipher(17, 17, 2)
+		vncipher(18, 18, 2)
+		vncipher(19, 19, 2)
+		vncipher(16, 16, 1)
+		vncipher(17, 17, 1)
+		vncipher(18, 18, 1)
+		vncipher(19, 19, 1)
+		vncipherlast(16, 16, 0)
+		vncipherlast(17, 17, 0)
+		vncipherlast(18, 18, 0)
+		vncipherlast(19, 19, 0)
+
+		/*
+		 * XOR decrypted blocks with IV / previous block.
+		 */
+		vxor(16, 16, 24)
+		vxor(17, 17, 20)
+		vxor(18, 18, 21)
+		vxor(19, 19, 22)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		/*
+		 * Fourth encrypted block is IV for next run.
+		 */
+		vand(24, 23, 23)
+
+		addi(%[buf], %[buf], 64)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcdec_192(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v12
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v24.
+		 */
+		lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(24, 24, 24, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next ciphertext words in v16..v19. Also save them
+		 * in v20..v23.
+		 */
+		lxvw4x(48, %[cc0], %[buf])
+		lxvw4x(49, %[cc1], %[buf])
+		lxvw4x(50, %[cc2], %[buf])
+		lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		vand(20, 16, 16)
+		vand(21, 17, 17)
+		vand(22, 18, 18)
+		vand(23, 19, 19)
+
+		/*
+		 * Decrypt the blocks.
+		 */
+		vxor(16, 16, 12)
+		vxor(17, 17, 12)
+		vxor(18, 18, 12)
+		vxor(19, 19, 12)
+		vncipher(16, 16, 11)
+		vncipher(17, 17, 11)
+		vncipher(18, 18, 11)
+		vncipher(19, 19, 11)
+		vncipher(16, 16, 10)
+		vncipher(17, 17, 10)
+		vncipher(18, 18, 10)
+		vncipher(19, 19, 10)
+		vncipher(16, 16, 9)
+		vncipher(17, 17, 9)
+		vncipher(18, 18, 9)
+		vncipher(19, 19, 9)
+		vncipher(16, 16, 8)
+		vncipher(17, 17, 8)
+		vncipher(18, 18, 8)
+		vncipher(19, 19, 8)
+		vncipher(16, 16, 7)
+		vncipher(17, 17, 7)
+		vncipher(18, 18, 7)
+		vncipher(19, 19, 7)
+		vncipher(16, 16, 6)
+		vncipher(17, 17, 6)
+		vncipher(18, 18, 6)
+		vncipher(19, 19, 6)
+		vncipher(16, 16, 5)
+		vncipher(17, 17, 5)
+		vncipher(18, 18, 5)
+		vncipher(19, 19, 5)
+		vncipher(16, 16, 4)
+		vncipher(17, 17, 4)
+		vncipher(18, 18, 4)
+		vncipher(19, 19, 4)
+		vncipher(16, 16, 3)
+		vncipher(17, 17, 3)
+		vncipher(18, 18, 3)
+		vncipher(19, 19, 3)
+		vncipher(16, 16, 2)
+		vncipher(17, 17, 2)
+		vncipher(18, 18, 2)
+		vncipher(19, 19, 2)
+		vncipher(16, 16, 1)
+		vncipher(17, 17, 1)
+		vncipher(18, 18, 1)
+		vncipher(19, 19, 1)
+		vncipherlast(16, 16, 0)
+		vncipherlast(17, 17, 0)
+		vncipherlast(18, 18, 0)
+		vncipherlast(19, 19, 0)
+
+		/*
+		 * XOR decrypted blocks with IV / previous block.
+		 */
+		vxor(16, 16, 24)
+		vxor(17, 17, 20)
+		vxor(18, 18, 21)
+		vxor(19, 19, 22)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		/*
+		 * Fourth encrypted block is IV for next run.
+		 */
+		vand(24, 23, 23)
+
+		addi(%[buf], %[buf], 64)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcdec_256(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v14
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(45, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(46, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v24.
+		 */
+		lxvw4x(56, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(24, 24, 24, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next ciphertext words in v16..v19. Also save them
+		 * in v20..v23.
+		 */
+		lxvw4x(48, %[cc0], %[buf])
+		lxvw4x(49, %[cc1], %[buf])
+		lxvw4x(50, %[cc2], %[buf])
+		lxvw4x(51, %[cc3], %[buf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		vand(20, 16, 16)
+		vand(21, 17, 17)
+		vand(22, 18, 18)
+		vand(23, 19, 19)
+
+		/*
+		 * Decrypt the blocks.
+		 */
+		vxor(16, 16, 14)
+		vxor(17, 17, 14)
+		vxor(18, 18, 14)
+		vxor(19, 19, 14)
+		vncipher(16, 16, 13)
+		vncipher(17, 17, 13)
+		vncipher(18, 18, 13)
+		vncipher(19, 19, 13)
+		vncipher(16, 16, 12)
+		vncipher(17, 17, 12)
+		vncipher(18, 18, 12)
+		vncipher(19, 19, 12)
+		vncipher(16, 16, 11)
+		vncipher(17, 17, 11)
+		vncipher(18, 18, 11)
+		vncipher(19, 19, 11)
+		vncipher(16, 16, 10)
+		vncipher(17, 17, 10)
+		vncipher(18, 18, 10)
+		vncipher(19, 19, 10)
+		vncipher(16, 16, 9)
+		vncipher(17, 17, 9)
+		vncipher(18, 18, 9)
+		vncipher(19, 19, 9)
+		vncipher(16, 16, 8)
+		vncipher(17, 17, 8)
+		vncipher(18, 18, 8)
+		vncipher(19, 19, 8)
+		vncipher(16, 16, 7)
+		vncipher(17, 17, 7)
+		vncipher(18, 18, 7)
+		vncipher(19, 19, 7)
+		vncipher(16, 16, 6)
+		vncipher(17, 17, 6)
+		vncipher(18, 18, 6)
+		vncipher(19, 19, 6)
+		vncipher(16, 16, 5)
+		vncipher(17, 17, 5)
+		vncipher(18, 18, 5)
+		vncipher(19, 19, 5)
+		vncipher(16, 16, 4)
+		vncipher(17, 17, 4)
+		vncipher(18, 18, 4)
+		vncipher(19, 19, 4)
+		vncipher(16, 16, 3)
+		vncipher(17, 17, 3)
+		vncipher(18, 18, 3)
+		vncipher(19, 19, 3)
+		vncipher(16, 16, 2)
+		vncipher(17, 17, 2)
+		vncipher(18, 18, 2)
+		vncipher(19, 19, 2)
+		vncipher(16, 16, 1)
+		vncipher(17, 17, 1)
+		vncipher(18, 18, 1)
+		vncipher(19, 19, 1)
+		vncipherlast(16, 16, 0)
+		vncipherlast(17, 17, 0)
+		vncipherlast(18, 18, 0)
+		vncipherlast(19, 19, 0)
+
+		/*
+		 * XOR decrypted blocks with IV / previous block.
+		 */
+		vxor(16, 16, 24)
+		vxor(17, 17, 20)
+		vxor(18, 18, 21)
+		vxor(19, 19, 22)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		/*
+		 * Fourth encrypted block is IV for next run.
+		 */
+		vand(24, 23, 23)
+
+		addi(%[buf], %[buf], 64)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (num_blocks >> 2)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char nextiv[16];
+	unsigned char *buf;
+
+	if (len == 0) {
+		return;
+	}
+	buf = data;
+	memcpy(nextiv, buf + len - 16, 16);
+	if (len >= 64) {
+		size_t num_blocks;
+		unsigned char tmp[16];
+
+		num_blocks = (len >> 4) & ~(size_t)3;
+		memcpy(tmp, buf + (num_blocks << 4) - 16, 16);
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcdec_128(ctx->skey.skni, iv, buf, num_blocks);
+			break;
+		case 12:
+			cbcdec_192(ctx->skey.skni, iv, buf, num_blocks);
+			break;
+		default:
+			cbcdec_256(ctx->skey.skni, iv, buf, num_blocks);
+			break;
+		}
+		buf += num_blocks << 4;
+		len &= 63;
+		memcpy(iv, tmp, 16);
+	}
+	if (len > 0) {
+		unsigned char tmp[64];
+
+		memcpy(tmp, buf, len);
+		memset(tmp + len, 0, (sizeof tmp) - len);
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcdec_128(ctx->skey.skni, iv, tmp, 4);
+			break;
+		case 12:
+			cbcdec_192(ctx->skey.skni, iv, tmp, 4);
+			break;
+		default:
+			cbcdec_256(ctx->skey.skni, iv, tmp, 4);
+			break;
+		}
+		memcpy(buf, tmp, len);
+	}
+	memcpy(iv, nextiv, 16);
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable = {
+	sizeof(br_aes_pwr8_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_pwr8_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_pwr8_cbcdec_run
+};
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_pwr8_cbcdec_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_pwr8_cbcdec_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_pwr8_cbcenc.c b/third_party/bearssl/src/aes_pwr8_cbcenc.c
new file mode 100644
index 0000000..00f8eca
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8_cbcenc.c
@@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcenc_init(br_aes_pwr8_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_cbcenc_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+cbcenc_128(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t len)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v10
+		 */
+		lxvw4x(32, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(33, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(34, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(35, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(36, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(37, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(38, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(39, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(40, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(41, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(42, %[cc], %[sk])
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v16.
+		 */
+		lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next plaintext word and XOR with current IV.
+		 */
+		lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+		vperm(17, 17, 17, 15)
+#endif
+		vxor(16, 16, 17)
+
+		/*
+		 * Encrypt the block.
+		 */
+		vxor(16, 16, 0)
+		vcipher(16, 16, 1)
+		vcipher(16, 16, 2)
+		vcipher(16, 16, 3)
+		vcipher(16, 16, 4)
+		vcipher(16, 16, 5)
+		vcipher(16, 16, 6)
+		vcipher(16, 16, 7)
+		vcipher(16, 16, 8)
+		vcipher(16, 16, 9)
+		vcipherlast(16, 16, 10)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(17, 16, 16, 15)
+		stxvw4x(49, 0, %[buf])
+#else
+		stxvw4x(48, 0, %[buf])
+#endif
+		addi(%[buf], %[buf], 16)
+
+		bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcenc_192(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t len)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v12
+		 */
+		lxvw4x(32, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(33, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(34, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(35, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(36, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(37, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(38, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(39, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(40, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(41, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(42, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(43, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(44, %[cc], %[sk])
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v16.
+		 */
+		lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next plaintext word and XOR with current IV.
+		 */
+		lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+		vperm(17, 17, 17, 15)
+#endif
+		vxor(16, 16, 17)
+
+		/*
+		 * Encrypt the block.
+		 */
+		vxor(16, 16, 0)
+		vcipher(16, 16, 1)
+		vcipher(16, 16, 2)
+		vcipher(16, 16, 3)
+		vcipher(16, 16, 4)
+		vcipher(16, 16, 5)
+		vcipher(16, 16, 6)
+		vcipher(16, 16, 7)
+		vcipher(16, 16, 8)
+		vcipher(16, 16, 9)
+		vcipher(16, 16, 10)
+		vcipher(16, 16, 11)
+		vcipherlast(16, 16, 12)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(17, 16, 16, 15)
+		stxvw4x(49, 0, %[buf])
+#else
+		stxvw4x(48, 0, %[buf])
+#endif
+		addi(%[buf], %[buf], 16)
+
+		bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "ctr", "memory"
+	);
+}
+
+static void
+cbcenc_256(const unsigned char *sk,
+	const unsigned char *iv, unsigned char *buf, size_t len)
+{
+	long cc;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	cc = 0;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v14
+		 */
+		lxvw4x(32, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(33, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(34, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(35, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(36, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(37, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(38, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(39, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(40, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(41, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(42, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(43, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(44, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(45, %[cc], %[sk])
+		addi(%[cc], %[cc], 16)
+		lxvw4x(46, %[cc], %[sk])
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * Load IV into v16.
+		 */
+		lxvw4x(48, 0, %[iv])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Load next plaintext word and XOR with current IV.
+		 */
+		lxvw4x(49, 0, %[buf])
+#if BR_POWER8_LE
+		vperm(17, 17, 17, 15)
+#endif
+		vxor(16, 16, 17)
+
+		/*
+		 * Encrypt the block.
+		 */
+		vxor(16, 16, 0)
+		vcipher(16, 16, 1)
+		vcipher(16, 16, 2)
+		vcipher(16, 16, 3)
+		vcipher(16, 16, 4)
+		vcipher(16, 16, 5)
+		vcipher(16, 16, 6)
+		vcipher(16, 16, 7)
+		vcipher(16, 16, 8)
+		vcipher(16, 16, 9)
+		vcipher(16, 16, 10)
+		vcipher(16, 16, 11)
+		vcipher(16, 16, 12)
+		vcipher(16, 16, 13)
+		vcipherlast(16, 16, 14)
+
+		/*
+		 * Store back result (with byteswap)
+		 */
+#if BR_POWER8_LE
+		vperm(17, 16, 16, 15)
+		stxvw4x(49, 0, %[buf])
+#else
+		stxvw4x(48, 0, %[buf])
+#endif
+		addi(%[buf], %[buf], 16)
+
+		bdnz(loop)
+
+: [cc] "+b" (cc), [buf] "+b" (buf)
+: [sk] "b" (sk), [iv] "b" (iv), [num_blocks] "b" (len >> 4)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_cbcenc_run(const br_aes_pwr8_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	if (len > 0) {
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcenc_128(ctx->skey.skni, iv, data, len);
+			break;
+		case 12:
+			cbcenc_192(ctx->skey.skni, iv, data, len);
+			break;
+		default:
+			cbcenc_256(ctx->skey.skni, iv, data, len);
+			break;
+		}
+		memcpy(iv, (unsigned char *)data + (len - 16), 16);
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_pwr8_cbcenc_vtable = {
+	sizeof(br_aes_pwr8_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_pwr8_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_pwr8_cbcenc_run
+};
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_pwr8_cbcenc_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcenc_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_pwr8_cbcenc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_pwr8_ctr.c b/third_party/bearssl/src/aes_pwr8_ctr.c
new file mode 100644
index 0000000..f5d20c0
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8_ctr.c
@@ -0,0 +1,717 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctr_init(br_aes_pwr8_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_ctr_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+static void
+ctr_128(const unsigned char *sk, const unsigned char *ivbuf,
+	unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+	static const uint32_t ctrinc[] = {
+		0, 0, 0, 4
+	};
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v10
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * v28 = increment for IV counter.
+		 */
+		lxvw4x(60, 0, %[ctrinc])
+
+		/*
+		 * Load IV into v16..v19
+		 */
+		lxvw4x(48, %[cc0], %[ivbuf])
+		lxvw4x(49, %[cc1], %[ivbuf])
+		lxvw4x(50, %[cc2], %[ivbuf])
+		lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Compute next IV into v24..v27
+		 */
+		vadduwm(24, 16, 28)
+		vadduwm(25, 17, 28)
+		vadduwm(26, 18, 28)
+		vadduwm(27, 19, 28)
+
+		/*
+		 * Load next data blocks. We do this early on but we
+		 * won't need them until IV encryption is done.
+		 */
+		lxvw4x(52, %[cc0], %[buf])
+		lxvw4x(53, %[cc1], %[buf])
+		lxvw4x(54, %[cc2], %[buf])
+		lxvw4x(55, %[cc3], %[buf])
+
+		/*
+		 * Encrypt the current IV.
+		 */
+		vxor(16, 16, 0)
+		vxor(17, 17, 0)
+		vxor(18, 18, 0)
+		vxor(19, 19, 0)
+		vcipher(16, 16, 1)
+		vcipher(17, 17, 1)
+		vcipher(18, 18, 1)
+		vcipher(19, 19, 1)
+		vcipher(16, 16, 2)
+		vcipher(17, 17, 2)
+		vcipher(18, 18, 2)
+		vcipher(19, 19, 2)
+		vcipher(16, 16, 3)
+		vcipher(17, 17, 3)
+		vcipher(18, 18, 3)
+		vcipher(19, 19, 3)
+		vcipher(16, 16, 4)
+		vcipher(17, 17, 4)
+		vcipher(18, 18, 4)
+		vcipher(19, 19, 4)
+		vcipher(16, 16, 5)
+		vcipher(17, 17, 5)
+		vcipher(18, 18, 5)
+		vcipher(19, 19, 5)
+		vcipher(16, 16, 6)
+		vcipher(17, 17, 6)
+		vcipher(18, 18, 6)
+		vcipher(19, 19, 6)
+		vcipher(16, 16, 7)
+		vcipher(17, 17, 7)
+		vcipher(18, 18, 7)
+		vcipher(19, 19, 7)
+		vcipher(16, 16, 8)
+		vcipher(17, 17, 8)
+		vcipher(18, 18, 8)
+		vcipher(19, 19, 8)
+		vcipher(16, 16, 9)
+		vcipher(17, 17, 9)
+		vcipher(18, 18, 9)
+		vcipher(19, 19, 9)
+		vcipherlast(16, 16, 10)
+		vcipherlast(17, 17, 10)
+		vcipherlast(18, 18, 10)
+		vcipherlast(19, 19, 10)
+
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		/*
+		 * Load next plaintext word and XOR with encrypted IV.
+		 */
+		vxor(16, 20, 16)
+		vxor(17, 21, 17)
+		vxor(18, 22, 18)
+		vxor(19, 23, 19)
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		addi(%[buf], %[buf], 64)
+
+		/*
+		 * Update IV.
+		 */
+		vand(16, 24, 24)
+		vand(17, 25, 25)
+		vand(18, 26, 26)
+		vand(19, 27, 27)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+  [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+ctr_192(const unsigned char *sk, const unsigned char *ivbuf,
+	unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+	static const uint32_t ctrinc[] = {
+		0, 0, 0, 4
+	};
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v12
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * v28 = increment for IV counter.
+		 */
+		lxvw4x(60, 0, %[ctrinc])
+
+		/*
+		 * Load IV into v16..v19
+		 */
+		lxvw4x(48, %[cc0], %[ivbuf])
+		lxvw4x(49, %[cc1], %[ivbuf])
+		lxvw4x(50, %[cc2], %[ivbuf])
+		lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Compute next IV into v24..v27
+		 */
+		vadduwm(24, 16, 28)
+		vadduwm(25, 17, 28)
+		vadduwm(26, 18, 28)
+		vadduwm(27, 19, 28)
+
+		/*
+		 * Load next data blocks. We do this early on but we
+		 * won't need them until IV encryption is done.
+		 */
+		lxvw4x(52, %[cc0], %[buf])
+		lxvw4x(53, %[cc1], %[buf])
+		lxvw4x(54, %[cc2], %[buf])
+		lxvw4x(55, %[cc3], %[buf])
+
+		/*
+		 * Encrypt the current IV.
+		 */
+		vxor(16, 16, 0)
+		vxor(17, 17, 0)
+		vxor(18, 18, 0)
+		vxor(19, 19, 0)
+		vcipher(16, 16, 1)
+		vcipher(17, 17, 1)
+		vcipher(18, 18, 1)
+		vcipher(19, 19, 1)
+		vcipher(16, 16, 2)
+		vcipher(17, 17, 2)
+		vcipher(18, 18, 2)
+		vcipher(19, 19, 2)
+		vcipher(16, 16, 3)
+		vcipher(17, 17, 3)
+		vcipher(18, 18, 3)
+		vcipher(19, 19, 3)
+		vcipher(16, 16, 4)
+		vcipher(17, 17, 4)
+		vcipher(18, 18, 4)
+		vcipher(19, 19, 4)
+		vcipher(16, 16, 5)
+		vcipher(17, 17, 5)
+		vcipher(18, 18, 5)
+		vcipher(19, 19, 5)
+		vcipher(16, 16, 6)
+		vcipher(17, 17, 6)
+		vcipher(18, 18, 6)
+		vcipher(19, 19, 6)
+		vcipher(16, 16, 7)
+		vcipher(17, 17, 7)
+		vcipher(18, 18, 7)
+		vcipher(19, 19, 7)
+		vcipher(16, 16, 8)
+		vcipher(17, 17, 8)
+		vcipher(18, 18, 8)
+		vcipher(19, 19, 8)
+		vcipher(16, 16, 9)
+		vcipher(17, 17, 9)
+		vcipher(18, 18, 9)
+		vcipher(19, 19, 9)
+		vcipher(16, 16, 10)
+		vcipher(17, 17, 10)
+		vcipher(18, 18, 10)
+		vcipher(19, 19, 10)
+		vcipher(16, 16, 11)
+		vcipher(17, 17, 11)
+		vcipher(18, 18, 11)
+		vcipher(19, 19, 11)
+		vcipherlast(16, 16, 12)
+		vcipherlast(17, 17, 12)
+		vcipherlast(18, 18, 12)
+		vcipherlast(19, 19, 12)
+
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		/*
+		 * Load next plaintext word and XOR with encrypted IV.
+		 */
+		vxor(16, 20, 16)
+		vxor(17, 21, 17)
+		vxor(18, 22, 18)
+		vxor(19, 23, 19)
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		addi(%[buf], %[buf], 64)
+
+		/*
+		 * Update IV.
+		 */
+		vand(16, 24, 24)
+		vand(17, 25, 25)
+		vand(18, 26, 26)
+		vand(19, 27, 27)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+  [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+static void
+ctr_256(const unsigned char *sk, const unsigned char *ivbuf,
+	unsigned char *buf, size_t num_blocks)
+{
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+	static const uint32_t ctrinc[] = {
+		0, 0, 0, 4
+	};
+
+	cc0 = 0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+
+		/*
+		 * Load subkeys into v0..v14
+		 */
+		lxvw4x(32, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(33, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(34, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(35, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(36, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(37, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(38, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(39, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(40, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(41, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(42, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(43, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(44, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(45, %[cc0], %[sk])
+		addi(%[cc0], %[cc0], 16)
+		lxvw4x(46, %[cc0], %[sk])
+		li(%[cc0], 0)
+
+#if BR_POWER8_LE
+		/*
+		 * v15 = constant for byteswapping words
+		 */
+		lxvw4x(47, 0, %[idx2be])
+#endif
+		/*
+		 * v28 = increment for IV counter.
+		 */
+		lxvw4x(60, 0, %[ctrinc])
+
+		/*
+		 * Load IV into v16..v19
+		 */
+		lxvw4x(48, %[cc0], %[ivbuf])
+		lxvw4x(49, %[cc1], %[ivbuf])
+		lxvw4x(50, %[cc2], %[ivbuf])
+		lxvw4x(51, %[cc3], %[ivbuf])
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		mtctr(%[num_blocks])
+	label(loop)
+		/*
+		 * Compute next IV into v24..v27
+		 */
+		vadduwm(24, 16, 28)
+		vadduwm(25, 17, 28)
+		vadduwm(26, 18, 28)
+		vadduwm(27, 19, 28)
+
+		/*
+		 * Load next data blocks. We do this early on but we
+		 * won't need them until IV encryption is done.
+		 */
+		lxvw4x(52, %[cc0], %[buf])
+		lxvw4x(53, %[cc1], %[buf])
+		lxvw4x(54, %[cc2], %[buf])
+		lxvw4x(55, %[cc3], %[buf])
+
+		/*
+		 * Encrypt the current IV.
+		 */
+		vxor(16, 16, 0)
+		vxor(17, 17, 0)
+		vxor(18, 18, 0)
+		vxor(19, 19, 0)
+		vcipher(16, 16, 1)
+		vcipher(17, 17, 1)
+		vcipher(18, 18, 1)
+		vcipher(19, 19, 1)
+		vcipher(16, 16, 2)
+		vcipher(17, 17, 2)
+		vcipher(18, 18, 2)
+		vcipher(19, 19, 2)
+		vcipher(16, 16, 3)
+		vcipher(17, 17, 3)
+		vcipher(18, 18, 3)
+		vcipher(19, 19, 3)
+		vcipher(16, 16, 4)
+		vcipher(17, 17, 4)
+		vcipher(18, 18, 4)
+		vcipher(19, 19, 4)
+		vcipher(16, 16, 5)
+		vcipher(17, 17, 5)
+		vcipher(18, 18, 5)
+		vcipher(19, 19, 5)
+		vcipher(16, 16, 6)
+		vcipher(17, 17, 6)
+		vcipher(18, 18, 6)
+		vcipher(19, 19, 6)
+		vcipher(16, 16, 7)
+		vcipher(17, 17, 7)
+		vcipher(18, 18, 7)
+		vcipher(19, 19, 7)
+		vcipher(16, 16, 8)
+		vcipher(17, 17, 8)
+		vcipher(18, 18, 8)
+		vcipher(19, 19, 8)
+		vcipher(16, 16, 9)
+		vcipher(17, 17, 9)
+		vcipher(18, 18, 9)
+		vcipher(19, 19, 9)
+		vcipher(16, 16, 10)
+		vcipher(17, 17, 10)
+		vcipher(18, 18, 10)
+		vcipher(19, 19, 10)
+		vcipher(16, 16, 11)
+		vcipher(17, 17, 11)
+		vcipher(18, 18, 11)
+		vcipher(19, 19, 11)
+		vcipher(16, 16, 12)
+		vcipher(17, 17, 12)
+		vcipher(18, 18, 12)
+		vcipher(19, 19, 12)
+		vcipher(16, 16, 13)
+		vcipher(17, 17, 13)
+		vcipher(18, 18, 13)
+		vcipher(19, 19, 13)
+		vcipherlast(16, 16, 14)
+		vcipherlast(17, 17, 14)
+		vcipherlast(18, 18, 14)
+		vcipherlast(19, 19, 14)
+
+#if BR_POWER8_LE
+		vperm(16, 16, 16, 15)
+		vperm(17, 17, 17, 15)
+		vperm(18, 18, 18, 15)
+		vperm(19, 19, 19, 15)
+#endif
+
+		/*
+		 * Load next plaintext word and XOR with encrypted IV.
+		 */
+		vxor(16, 20, 16)
+		vxor(17, 21, 17)
+		vxor(18, 22, 18)
+		vxor(19, 23, 19)
+		stxvw4x(48, %[cc0], %[buf])
+		stxvw4x(49, %[cc1], %[buf])
+		stxvw4x(50, %[cc2], %[buf])
+		stxvw4x(51, %[cc3], %[buf])
+
+		addi(%[buf], %[buf], 64)
+
+		/*
+		 * Update IV.
+		 */
+		vand(16, 24, 24)
+		vand(17, 25, 25)
+		vand(18, 26, 26)
+		vand(19, 27, 27)
+
+		bdnz(loop)
+
+: [cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3),
+  [buf] "+b" (buf)
+: [sk] "b" (sk), [ivbuf] "b" (ivbuf), [num_blocks] "b" (num_blocks >> 2),
+  [ctrinc] "b" (ctrinc)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_pwr8_ctr_run(const br_aes_pwr8_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char ivbuf[64];
+
+	buf = data;
+	memcpy(ivbuf +  0, iv, 12);
+	memcpy(ivbuf + 16, iv, 12);
+	memcpy(ivbuf + 32, iv, 12);
+	memcpy(ivbuf + 48, iv, 12);
+	if (len >= 64) {
+		br_enc32be(ivbuf + 12, cc + 0);
+		br_enc32be(ivbuf + 28, cc + 1);
+		br_enc32be(ivbuf + 44, cc + 2);
+		br_enc32be(ivbuf + 60, cc + 3);
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ivbuf, buf,
+				(len >> 4) & ~(size_t)3);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ivbuf, buf,
+				(len >> 4) & ~(size_t)3);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ivbuf, buf,
+				(len >> 4) & ~(size_t)3);
+			break;
+		}
+		cc += (len >> 4) & ~(size_t)3;
+		buf += len & ~(size_t)63;
+		len &= 63;
+	}
+	if (len > 0) {
+		unsigned char tmp[64];
+
+		memcpy(tmp, buf, len);
+		memset(tmp + len, 0, (sizeof tmp) - len);
+		br_enc32be(ivbuf + 12, cc + 0);
+		br_enc32be(ivbuf + 28, cc + 1);
+		br_enc32be(ivbuf + 44, cc + 2);
+		br_enc32be(ivbuf + 60, cc + 3);
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ivbuf, tmp, 4);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ivbuf, tmp, 4);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ivbuf, tmp, 4);
+			break;
+		}
+		memcpy(buf, tmp, len);
+		cc += (len + 15) >> 4;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_pwr8_ctr_vtable = {
+	sizeof(br_aes_pwr8_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_pwr8_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_pwr8_ctr_run
+};
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_pwr8_ctr_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_ctr_vtable : NULL;
+}
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_pwr8_ctr_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_pwr8_ctrcbc.c b/third_party/bearssl/src/aes_pwr8_ctrcbc.c
new file mode 100644
index 0000000..a67d30b
--- /dev/null
+++ b/third_party/bearssl/src/aes_pwr8_ctrcbc.c
@@ -0,0 +1,946 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+#if BR_POWER8
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_pwr8_ctrcbc_get_vtable(void)
+{
+	return br_aes_pwr8_supported() ? &br_aes_pwr8_ctrcbc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_init(br_aes_pwr8_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_pwr8_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_pwr8_keysched(ctx->skey.skni, key, len);
+}
+
+/*
+ * Register conventions for CTR + CBC-MAC:
+ *
+ *   AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
+ *   Register v15 contains the byteswap index register (little-endian only)
+ *   Register v16 contains the CTR counter value
+ *   Register v17 contains the CBC-MAC current value
+ *   Registers v18 to v27 are scratch
+ *   Counter increment uses v28, v29 and v30
+ *
+ * For CTR alone:
+ *  
+ *   AES subkeys are in registers 0 to 10/12/14 (depending on keys size)
+ *   Register v15 contains the byteswap index register (little-endian only)
+ *   Registers v16 to v19 contain the CTR counter values (four blocks)
+ *   Registers v20 to v27 are scratch
+ *   Counter increment uses v28, v29 and v30
+ */
+
+#define LOAD_SUBKEYS_128 \
+		lxvw4x(32, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(33, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(34, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(35, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(36, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(37, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(38, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(39, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(40, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(41, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(42, %[cc], %[sk])
+
+#define LOAD_SUBKEYS_192 \
+		LOAD_SUBKEYS_128 \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(43, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(44, %[cc], %[sk])
+
+#define LOAD_SUBKEYS_256 \
+		LOAD_SUBKEYS_192 \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(45, %[cc], %[sk])   \
+		addi(%[cc], %[cc], 16)     \
+		lxvw4x(46, %[cc], %[sk])
+
+#define BLOCK_ENCRYPT_128(x) \
+		vxor(x, x, 0) \
+		vcipher(x, x, 1) \
+		vcipher(x, x, 2) \
+		vcipher(x, x, 3) \
+		vcipher(x, x, 4) \
+		vcipher(x, x, 5) \
+		vcipher(x, x, 6) \
+		vcipher(x, x, 7) \
+		vcipher(x, x, 8) \
+		vcipher(x, x, 9) \
+		vcipherlast(x, x, 10)
+
+#define BLOCK_ENCRYPT_192(x) \
+		vxor(x, x, 0) \
+		vcipher(x, x, 1) \
+		vcipher(x, x, 2) \
+		vcipher(x, x, 3) \
+		vcipher(x, x, 4) \
+		vcipher(x, x, 5) \
+		vcipher(x, x, 6) \
+		vcipher(x, x, 7) \
+		vcipher(x, x, 8) \
+		vcipher(x, x, 9) \
+		vcipher(x, x, 10) \
+		vcipher(x, x, 11) \
+		vcipherlast(x, x, 12)
+
+#define BLOCK_ENCRYPT_256(x) \
+		vxor(x, x, 0) \
+		vcipher(x, x, 1) \
+		vcipher(x, x, 2) \
+		vcipher(x, x, 3) \
+		vcipher(x, x, 4) \
+		vcipher(x, x, 5) \
+		vcipher(x, x, 6) \
+		vcipher(x, x, 7) \
+		vcipher(x, x, 8) \
+		vcipher(x, x, 9) \
+		vcipher(x, x, 10) \
+		vcipher(x, x, 11) \
+		vcipher(x, x, 12) \
+		vcipher(x, x, 13) \
+		vcipherlast(x, x, 14)
+
+#define BLOCK_ENCRYPT_X2_128(x, y) \
+		vxor(x, x, 0) \
+		vxor(y, y, 0) \
+		vcipher(x, x, 1) \
+		vcipher(y, y, 1) \
+		vcipher(x, x, 2) \
+		vcipher(y, y, 2) \
+		vcipher(x, x, 3) \
+		vcipher(y, y, 3) \
+		vcipher(x, x, 4) \
+		vcipher(y, y, 4) \
+		vcipher(x, x, 5) \
+		vcipher(y, y, 5) \
+		vcipher(x, x, 6) \
+		vcipher(y, y, 6) \
+		vcipher(x, x, 7) \
+		vcipher(y, y, 7) \
+		vcipher(x, x, 8) \
+		vcipher(y, y, 8) \
+		vcipher(x, x, 9) \
+		vcipher(y, y, 9) \
+		vcipherlast(x, x, 10) \
+		vcipherlast(y, y, 10)
+
+#define BLOCK_ENCRYPT_X2_192(x, y) \
+		vxor(x, x, 0) \
+		vxor(y, y, 0) \
+		vcipher(x, x, 1) \
+		vcipher(y, y, 1) \
+		vcipher(x, x, 2) \
+		vcipher(y, y, 2) \
+		vcipher(x, x, 3) \
+		vcipher(y, y, 3) \
+		vcipher(x, x, 4) \
+		vcipher(y, y, 4) \
+		vcipher(x, x, 5) \
+		vcipher(y, y, 5) \
+		vcipher(x, x, 6) \
+		vcipher(y, y, 6) \
+		vcipher(x, x, 7) \
+		vcipher(y, y, 7) \
+		vcipher(x, x, 8) \
+		vcipher(y, y, 8) \
+		vcipher(x, x, 9) \
+		vcipher(y, y, 9) \
+		vcipher(x, x, 10) \
+		vcipher(y, y, 10) \
+		vcipher(x, x, 11) \
+		vcipher(y, y, 11) \
+		vcipherlast(x, x, 12) \
+		vcipherlast(y, y, 12)
+
+#define BLOCK_ENCRYPT_X2_256(x, y) \
+		vxor(x, x, 0) \
+		vxor(y, y, 0) \
+		vcipher(x, x, 1) \
+		vcipher(y, y, 1) \
+		vcipher(x, x, 2) \
+		vcipher(y, y, 2) \
+		vcipher(x, x, 3) \
+		vcipher(y, y, 3) \
+		vcipher(x, x, 4) \
+		vcipher(y, y, 4) \
+		vcipher(x, x, 5) \
+		vcipher(y, y, 5) \
+		vcipher(x, x, 6) \
+		vcipher(y, y, 6) \
+		vcipher(x, x, 7) \
+		vcipher(y, y, 7) \
+		vcipher(x, x, 8) \
+		vcipher(y, y, 8) \
+		vcipher(x, x, 9) \
+		vcipher(y, y, 9) \
+		vcipher(x, x, 10) \
+		vcipher(y, y, 10) \
+		vcipher(x, x, 11) \
+		vcipher(y, y, 11) \
+		vcipher(x, x, 12) \
+		vcipher(y, y, 12) \
+		vcipher(x, x, 13) \
+		vcipher(y, y, 13) \
+		vcipherlast(x, x, 14) \
+		vcipherlast(y, y, 14)
+
+#define BLOCK_ENCRYPT_X4_128(x0, x1, x2, x3) \
+		vxor(x0, x0, 0) \
+		vxor(x1, x1, 0) \
+		vxor(x2, x2, 0) \
+		vxor(x3, x3, 0) \
+		vcipher(x0, x0, 1) \
+		vcipher(x1, x1, 1) \
+		vcipher(x2, x2, 1) \
+		vcipher(x3, x3, 1) \
+		vcipher(x0, x0, 2) \
+		vcipher(x1, x1, 2) \
+		vcipher(x2, x2, 2) \
+		vcipher(x3, x3, 2) \
+		vcipher(x0, x0, 3) \
+		vcipher(x1, x1, 3) \
+		vcipher(x2, x2, 3) \
+		vcipher(x3, x3, 3) \
+		vcipher(x0, x0, 4) \
+		vcipher(x1, x1, 4) \
+		vcipher(x2, x2, 4) \
+		vcipher(x3, x3, 4) \
+		vcipher(x0, x0, 5) \
+		vcipher(x1, x1, 5) \
+		vcipher(x2, x2, 5) \
+		vcipher(x3, x3, 5) \
+		vcipher(x0, x0, 6) \
+		vcipher(x1, x1, 6) \
+		vcipher(x2, x2, 6) \
+		vcipher(x3, x3, 6) \
+		vcipher(x0, x0, 7) \
+		vcipher(x1, x1, 7) \
+		vcipher(x2, x2, 7) \
+		vcipher(x3, x3, 7) \
+		vcipher(x0, x0, 8) \
+		vcipher(x1, x1, 8) \
+		vcipher(x2, x2, 8) \
+		vcipher(x3, x3, 8) \
+		vcipher(x0, x0, 9) \
+		vcipher(x1, x1, 9) \
+		vcipher(x2, x2, 9) \
+		vcipher(x3, x3, 9) \
+		vcipherlast(x0, x0, 10) \
+		vcipherlast(x1, x1, 10) \
+		vcipherlast(x2, x2, 10) \
+		vcipherlast(x3, x3, 10)
+
+#define BLOCK_ENCRYPT_X4_192(x0, x1, x2, x3) \
+		vxor(x0, x0, 0) \
+		vxor(x1, x1, 0) \
+		vxor(x2, x2, 0) \
+		vxor(x3, x3, 0) \
+		vcipher(x0, x0, 1) \
+		vcipher(x1, x1, 1) \
+		vcipher(x2, x2, 1) \
+		vcipher(x3, x3, 1) \
+		vcipher(x0, x0, 2) \
+		vcipher(x1, x1, 2) \
+		vcipher(x2, x2, 2) \
+		vcipher(x3, x3, 2) \
+		vcipher(x0, x0, 3) \
+		vcipher(x1, x1, 3) \
+		vcipher(x2, x2, 3) \
+		vcipher(x3, x3, 3) \
+		vcipher(x0, x0, 4) \
+		vcipher(x1, x1, 4) \
+		vcipher(x2, x2, 4) \
+		vcipher(x3, x3, 4) \
+		vcipher(x0, x0, 5) \
+		vcipher(x1, x1, 5) \
+		vcipher(x2, x2, 5) \
+		vcipher(x3, x3, 5) \
+		vcipher(x0, x0, 6) \
+		vcipher(x1, x1, 6) \
+		vcipher(x2, x2, 6) \
+		vcipher(x3, x3, 6) \
+		vcipher(x0, x0, 7) \
+		vcipher(x1, x1, 7) \
+		vcipher(x2, x2, 7) \
+		vcipher(x3, x3, 7) \
+		vcipher(x0, x0, 8) \
+		vcipher(x1, x1, 8) \
+		vcipher(x2, x2, 8) \
+		vcipher(x3, x3, 8) \
+		vcipher(x0, x0, 9) \
+		vcipher(x1, x1, 9) \
+		vcipher(x2, x2, 9) \
+		vcipher(x3, x3, 9) \
+		vcipher(x0, x0, 10) \
+		vcipher(x1, x1, 10) \
+		vcipher(x2, x2, 10) \
+		vcipher(x3, x3, 10) \
+		vcipher(x0, x0, 11) \
+		vcipher(x1, x1, 11) \
+		vcipher(x2, x2, 11) \
+		vcipher(x3, x3, 11) \
+		vcipherlast(x0, x0, 12) \
+		vcipherlast(x1, x1, 12) \
+		vcipherlast(x2, x2, 12) \
+		vcipherlast(x3, x3, 12)
+
+#define BLOCK_ENCRYPT_X4_256(x0, x1, x2, x3) \
+		vxor(x0, x0, 0) \
+		vxor(x1, x1, 0) \
+		vxor(x2, x2, 0) \
+		vxor(x3, x3, 0) \
+		vcipher(x0, x0, 1) \
+		vcipher(x1, x1, 1) \
+		vcipher(x2, x2, 1) \
+		vcipher(x3, x3, 1) \
+		vcipher(x0, x0, 2) \
+		vcipher(x1, x1, 2) \
+		vcipher(x2, x2, 2) \
+		vcipher(x3, x3, 2) \
+		vcipher(x0, x0, 3) \
+		vcipher(x1, x1, 3) \
+		vcipher(x2, x2, 3) \
+		vcipher(x3, x3, 3) \
+		vcipher(x0, x0, 4) \
+		vcipher(x1, x1, 4) \
+		vcipher(x2, x2, 4) \
+		vcipher(x3, x3, 4) \
+		vcipher(x0, x0, 5) \
+		vcipher(x1, x1, 5) \
+		vcipher(x2, x2, 5) \
+		vcipher(x3, x3, 5) \
+		vcipher(x0, x0, 6) \
+		vcipher(x1, x1, 6) \
+		vcipher(x2, x2, 6) \
+		vcipher(x3, x3, 6) \
+		vcipher(x0, x0, 7) \
+		vcipher(x1, x1, 7) \
+		vcipher(x2, x2, 7) \
+		vcipher(x3, x3, 7) \
+		vcipher(x0, x0, 8) \
+		vcipher(x1, x1, 8) \
+		vcipher(x2, x2, 8) \
+		vcipher(x3, x3, 8) \
+		vcipher(x0, x0, 9) \
+		vcipher(x1, x1, 9) \
+		vcipher(x2, x2, 9) \
+		vcipher(x3, x3, 9) \
+		vcipher(x0, x0, 10) \
+		vcipher(x1, x1, 10) \
+		vcipher(x2, x2, 10) \
+		vcipher(x3, x3, 10) \
+		vcipher(x0, x0, 11) \
+		vcipher(x1, x1, 11) \
+		vcipher(x2, x2, 11) \
+		vcipher(x3, x3, 11) \
+		vcipher(x0, x0, 12) \
+		vcipher(x1, x1, 12) \
+		vcipher(x2, x2, 12) \
+		vcipher(x3, x3, 12) \
+		vcipher(x0, x0, 13) \
+		vcipher(x1, x1, 13) \
+		vcipher(x2, x2, 13) \
+		vcipher(x3, x3, 13) \
+		vcipherlast(x0, x0, 14) \
+		vcipherlast(x1, x1, 14) \
+		vcipherlast(x2, x2, 14) \
+		vcipherlast(x3, x3, 14)
+
+#if BR_POWER8_LE
+static const uint32_t idx2be[] = {
+	0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+};
+#define BYTESWAP_INIT     lxvw4x(47, 0, %[idx2be])
+#define BYTESWAP(x)       vperm(x, x, x, 15)
+#define BYTESWAPX(d, s)   vperm(d, s, s, 15)
+#define BYTESWAP_REG      , [idx2be] "b" (idx2be)
+#else
+#define BYTESWAP_INIT
+#define BYTESWAP(x)
+#define BYTESWAPX(d, s)   vand(d, s, s)
+#define BYTESWAP_REG
+#endif
+
+static const uint32_t ctrinc[] = {
+	0, 0, 0, 1
+};
+static const uint32_t ctrinc_x4[] = {
+	0, 0, 0, 4
+};
+#define INCR_128_INIT      lxvw4x(60, 0, %[ctrinc])
+#define INCR_128_X4_INIT   lxvw4x(60, 0, %[ctrinc_x4])
+#define INCR_128(d, s) \
+		vaddcuw(29, s, 28) \
+		vadduwm(d, s, 28) \
+		vsldoi(30, 29, 29, 4) \
+		vaddcuw(29, d, 30) \
+		vadduwm(d, d, 30) \
+		vsldoi(30, 29, 29, 4) \
+		vaddcuw(29, d, 30) \
+		vadduwm(d, d, 30) \
+		vsldoi(30, 29, 29, 4) \
+		vadduwm(d, d, 30)
+
+#define MKCTR(size) \
+static void \
+ctr_ ## size(const unsigned char *sk, \
+	unsigned char *ctrbuf, unsigned char *buf, size_t num_blocks_x4) \
+{ \
+	long cc, cc0, cc1, cc2, cc3; \
+ \
+	cc = 0; \
+	cc0 = 0; \
+	cc1 = 16; \
+	cc2 = 32; \
+	cc3 = 48; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+		INCR_128_X4_INIT \
+ \
+		/* \
+		 * Load current CTR counters into v16 to v19. \
+		 */ \
+		lxvw4x(48, %[cc0], %[ctrbuf]) \
+		lxvw4x(49, %[cc1], %[ctrbuf]) \
+		lxvw4x(50, %[cc2], %[ctrbuf]) \
+		lxvw4x(51, %[cc3], %[ctrbuf]) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		BYTESWAP(18) \
+		BYTESWAP(19) \
+ \
+		mtctr(%[num_blocks_x4]) \
+ \
+	label(loop) \
+		/* \
+		 * Compute next counter values into v20..v23. \
+		 */ \
+		INCR_128(20, 16) \
+		INCR_128(21, 17) \
+		INCR_128(22, 18) \
+		INCR_128(23, 19) \
+ \
+		/* \
+		 * Encrypt counter values and XOR into next data blocks. \
+		 */ \
+		lxvw4x(56, %[cc0], %[buf]) \
+		lxvw4x(57, %[cc1], %[buf]) \
+		lxvw4x(58, %[cc2], %[buf]) \
+		lxvw4x(59, %[cc3], %[buf]) \
+		BYTESWAP(24) \
+		BYTESWAP(25) \
+		BYTESWAP(26) \
+		BYTESWAP(27) \
+		BLOCK_ENCRYPT_X4_ ## size(16, 17, 18, 19) \
+		vxor(16, 16, 24) \
+		vxor(17, 17, 25) \
+		vxor(18, 18, 26) \
+		vxor(19, 19, 27) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		BYTESWAP(18) \
+		BYTESWAP(19) \
+		stxvw4x(48, %[cc0], %[buf]) \
+		stxvw4x(49, %[cc1], %[buf]) \
+		stxvw4x(50, %[cc2], %[buf]) \
+		stxvw4x(51, %[cc3], %[buf]) \
+ \
+		/* \
+		 * Update counters and data pointer. \
+		 */ \
+		vand(16, 20, 20) \
+		vand(17, 21, 21) \
+		vand(18, 22, 22) \
+		vand(19, 23, 23) \
+		addi(%[buf], %[buf], 64) \
+ \
+		bdnz(loop) \
+ \
+		/* \
+		 * Write back new counter values. \
+		 */ \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		BYTESWAP(18) \
+		BYTESWAP(19) \
+		stxvw4x(48, %[cc0], %[ctrbuf]) \
+		stxvw4x(49, %[cc1], %[ctrbuf]) \
+		stxvw4x(50, %[cc2], %[ctrbuf]) \
+		stxvw4x(51, %[cc3], %[ctrbuf]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf), \
+	[cc0] "+b" (cc0), [cc1] "+b" (cc1), [cc2] "+b" (cc2), [cc3] "+b" (cc3) \
+: [sk] "b" (sk), [ctrbuf] "b" (ctrbuf), \
+	[num_blocks_x4] "b" (num_blocks_x4), [ctrinc_x4] "b" (ctrinc_x4) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKCTR(128)
+MKCTR(192)
+MKCTR(256)
+
+#define MKCBCMAC(size) \
+static void \
+cbcmac_ ## size(const unsigned char *sk, \
+	unsigned char *cbcmac, const unsigned char *buf, size_t num_blocks) \
+{ \
+	long cc; \
+ \
+	cc = 0; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+ \
+		/* \
+		 * Load current CBC-MAC value into v16. \
+		 */ \
+		lxvw4x(48, %[cc], %[cbcmac]) \
+		BYTESWAP(16) \
+ \
+		mtctr(%[num_blocks]) \
+ \
+	label(loop) \
+		/* \
+		 * Load next block, XOR into current CBC-MAC value, \
+		 * and then encrypt it. \
+		 */ \
+		lxvw4x(49, %[cc], %[buf]) \
+		BYTESWAP(17) \
+		vxor(16, 16, 17) \
+		BLOCK_ENCRYPT_ ## size(16) \
+		addi(%[buf], %[buf], 16) \
+ \
+		bdnz(loop) \
+ \
+		/* \
+		 * Write back new CBC-MAC value. \
+		 */ \
+		BYTESWAP(16) \
+		stxvw4x(48, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [cbcmac] "b" (cbcmac), [num_blocks] "b" (num_blocks) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKCBCMAC(128)
+MKCBCMAC(192)
+MKCBCMAC(256)
+
+#define MKENCRYPT(size) \
+static void \
+ctrcbc_ ## size ## _encrypt(const unsigned char *sk, \
+	unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
+	size_t num_blocks) \
+{ \
+	long cc; \
+ \
+	cc = 0; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+		INCR_128_INIT \
+ \
+		/* \
+		 * Load current CTR counter into v16, and current \
+		 * CBC-MAC IV into v17. \
+		 */ \
+		lxvw4x(48, %[cc], %[ctr]) \
+		lxvw4x(49, %[cc], %[cbcmac]) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+ \
+		/* \
+		 * At each iteration, we do two parallel encryption: \
+		 *  - new counter value for encryption of the next block; \
+		 *  - CBC-MAC over the previous encrypted block. \
+		 * Thus, each plaintext block implies two AES instances, \
+		 * over two successive iterations. This requires a single \
+		 * counter encryption before the loop, and a single \
+		 * CBC-MAC encryption after the loop. \
+		 */ \
+ \
+		/* \
+		 * Encrypt first block (into v20). \
+		 */ \
+		lxvw4x(52, %[cc], %[buf]) \
+		BYTESWAP(20) \
+		INCR_128(22, 16) \
+		BLOCK_ENCRYPT_ ## size(16) \
+		vxor(20, 20, 16) \
+		BYTESWAPX(21, 20) \
+		stxvw4x(53, %[cc], %[buf]) \
+		vand(16, 22, 22) \
+		addi(%[buf], %[buf], 16) \
+ \
+		/* \
+		 * Load loop counter; skip the loop if there is only \
+		 * one block in total (already handled by the boundary \
+		 * conditions). \
+		 */ \
+		mtctr(%[num_blocks]) \
+		bdz(fastexit) \
+ \
+	label(loop) \
+		/* \
+		 * Upon loop entry: \
+		 *    v16   counter value for next block \
+		 *    v17   current CBC-MAC value \
+		 *    v20   encrypted previous block \
+		 */ \
+		vxor(17, 17, 20) \
+		INCR_128(22, 16) \
+		lxvw4x(52, %[cc], %[buf]) \
+		BYTESWAP(20) \
+		BLOCK_ENCRYPT_X2_ ## size(16, 17) \
+		vxor(20, 20, 16) \
+		BYTESWAPX(21, 20) \
+		stxvw4x(53, %[cc], %[buf]) \
+		addi(%[buf], %[buf], 16) \
+		vand(16, 22, 22) \
+ \
+		bdnz(loop) \
+ \
+	label(fastexit) \
+		vxor(17, 17, 20) \
+		BLOCK_ENCRYPT_ ## size(17) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		stxvw4x(48, %[cc], %[ctr]) \
+		stxvw4x(49, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
+	[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKENCRYPT(128)
+MKENCRYPT(192)
+MKENCRYPT(256)
+
+#define MKDECRYPT(size) \
+static void \
+ctrcbc_ ## size ## _decrypt(const unsigned char *sk, \
+	unsigned char *ctr, unsigned char *cbcmac, unsigned char *buf, \
+	size_t num_blocks) \
+{ \
+	long cc; \
+ \
+	cc = 0; \
+	asm volatile ( \
+ \
+		/* \
+		 * Load subkeys into v0..v10 \
+		 */ \
+		LOAD_SUBKEYS_ ## size \
+		li(%[cc], 0) \
+ \
+		BYTESWAP_INIT \
+		INCR_128_INIT \
+ \
+		/* \
+		 * Load current CTR counter into v16, and current \
+		 * CBC-MAC IV into v17. \
+		 */ \
+		lxvw4x(48, %[cc], %[ctr]) \
+		lxvw4x(49, %[cc], %[cbcmac]) \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+ \
+		/* \
+		 * At each iteration, we do two parallel encryption: \
+		 *  - new counter value for decryption of the next block; \
+		 *  - CBC-MAC over the next encrypted block. \
+		 * Each iteration performs the two AES instances related \
+		 * to the current block; there is thus no need for some \
+		 * extra pre-loop and post-loop work as in encryption. \
+		 */ \
+ \
+		mtctr(%[num_blocks]) \
+ \
+	label(loop) \
+		/* \
+		 * Upon loop entry: \
+		 *    v16   counter value for next block \
+		 *    v17   current CBC-MAC value \
+		 */ \
+		lxvw4x(52, %[cc], %[buf]) \
+		BYTESWAP(20) \
+		vxor(17, 17, 20) \
+		INCR_128(22, 16) \
+		BLOCK_ENCRYPT_X2_ ## size(16, 17) \
+		vxor(20, 20, 16) \
+		BYTESWAPX(21, 20) \
+		stxvw4x(53, %[cc], %[buf]) \
+		addi(%[buf], %[buf], 16) \
+		vand(16, 22, 22) \
+ \
+		bdnz(loop) \
+ \
+		/* \
+		 * Store back counter and CBC-MAC value. \
+		 */ \
+		BYTESWAP(16) \
+		BYTESWAP(17) \
+		stxvw4x(48, %[cc], %[ctr]) \
+		stxvw4x(49, %[cc], %[cbcmac]) \
+ \
+: [cc] "+b" (cc), [buf] "+b" (buf) \
+: [sk] "b" (sk), [ctr] "b" (ctr), [cbcmac] "b" (cbcmac), \
+	[num_blocks] "b" (num_blocks), [ctrinc] "b" (ctrinc) \
+	BYTESWAP_REG \
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", \
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", \
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", \
+  "v30", "ctr", "memory" \
+	); \
+}
+
+MKDECRYPT(128)
+MKDECRYPT(192)
+MKDECRYPT(256)
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_encrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	if (len == 0) {
+		return;
+	}
+	switch (ctx->num_rounds) {
+	case 10:
+		ctrcbc_128_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	case 12:
+		ctrcbc_192_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	default:
+		ctrcbc_256_encrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_decrypt(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	if (len == 0) {
+		return;
+	}
+	switch (ctx->num_rounds) {
+	case 10:
+		ctrcbc_128_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	case 12:
+		ctrcbc_192_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	default:
+		ctrcbc_256_decrypt(ctx->skey.skni, ctr, cbcmac, data, len >> 4);
+		break;
+	}
+}
+
+static inline void
+incr_ctr(void *dst, const void *src)
+{
+	uint64_t hi, lo;
+
+	hi = br_dec64be(src);
+	lo = br_dec64be((const unsigned char *)src + 8);
+	lo ++;
+	hi += ((lo | -lo) >> 63) ^ (uint64_t)1;
+	br_enc64be(dst, hi);
+	br_enc64be((unsigned char *)dst + 8, lo);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_ctr(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char ctrbuf[64];
+
+	memcpy(ctrbuf, ctr, 16);
+	incr_ctr(ctrbuf + 16, ctrbuf);
+	incr_ctr(ctrbuf + 32, ctrbuf + 16);
+	incr_ctr(ctrbuf + 48, ctrbuf + 32);
+	if (len >= 64) {
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ctrbuf, data, len >> 6);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ctrbuf, data, len >> 6);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ctrbuf, data, len >> 6);
+			break;
+		}
+		data = (unsigned char *)data + (len & ~(size_t)63);
+		len &= 63;
+	}
+	if (len > 0) {
+		unsigned char tmp[64];
+
+		if (len >= 32) {
+			if (len >= 48) {
+				memcpy(ctr, ctrbuf + 48, 16);
+			} else {
+				memcpy(ctr, ctrbuf + 32, 16);
+			}
+		} else {
+			if (len >= 16) {
+				memcpy(ctr, ctrbuf + 16, 16);
+			}
+		}
+		memcpy(tmp, data, len);
+		memset(tmp + len, 0, (sizeof tmp) - len);
+		switch (ctx->num_rounds) {
+		case 10:
+			ctr_128(ctx->skey.skni, ctrbuf, tmp, 1);
+			break;
+		case 12:
+			ctr_192(ctx->skey.skni, ctrbuf, tmp, 1);
+			break;
+		default:
+			ctr_256(ctx->skey.skni, ctrbuf, tmp, 1);
+			break;
+		}
+		memcpy(data, tmp, len);
+	} else {
+		memcpy(ctr, ctrbuf, 16);
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_pwr8_ctrcbc_mac(const br_aes_pwr8_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	if (len > 0) {
+		switch (ctx->num_rounds) {
+		case 10:
+			cbcmac_128(ctx->skey.skni, cbcmac, data, len >> 4);
+			break;
+		case 12:
+			cbcmac_192(ctx->skey.skni, cbcmac, data, len >> 4);
+			break;
+		default:
+			cbcmac_256(ctx->skey.skni, cbcmac, data, len >> 4);
+			break;
+		}
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_pwr8_ctrcbc_vtable = {
+	sizeof(br_aes_pwr8_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_pwr8_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_pwr8_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_pwr8_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_pwr8_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_pwr8_ctrcbc_mac
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_pwr8_ctrcbc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_small_cbcdec.c b/third_party/bearssl/src/aes_small_cbcdec.c
new file mode 100644
index 0000000..8567244
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_cbcdec.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcdec_init(br_aes_small_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_cbcdec_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcdec_run(const br_aes_small_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+		int i;
+
+		memcpy(tmp, buf, 16);
+		br_aes_small_decrypt(ctx->num_rounds, ctx->skey, buf);
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_small_cbcdec_vtable = {
+	sizeof(br_aes_small_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_small_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_small_cbcdec_run
+};
diff --git a/third_party/bearssl/src/aes_small_cbcenc.c b/third_party/bearssl/src/aes_small_cbcenc.c
new file mode 100644
index 0000000..0dc2910
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_cbcenc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcenc_init(br_aes_small_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_cbcenc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_cbcenc_run(const br_aes_small_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 16; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, buf);
+		memcpy(ivbuf, buf, 16);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_small_cbcenc_vtable = {
+	sizeof(br_aes_small_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_small_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_small_cbcenc_run
+};
diff --git a/third_party/bearssl/src/aes_small_ctr.c b/third_party/bearssl/src/aes_small_ctr.c
new file mode 100644
index 0000000..d5d371c
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_ctr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctr_init(br_aes_small_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_ctr_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_aes_small_ctr_run(const br_aes_small_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+
+		memcpy(tmp, iv, 12);
+		br_enc32be(tmp + 12, cc ++);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		if (len <= 16) {
+			xorbuf(buf, tmp, len);
+			break;
+		}
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+	}
+	return cc;
+}
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_small_ctr_vtable = {
+	sizeof(br_aes_small_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_small_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_small_ctr_run
+};
diff --git a/third_party/bearssl/src/aes_small_ctrcbc.c b/third_party/bearssl/src/aes_small_ctrcbc.c
new file mode 100644
index 0000000..2d6ba32
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_ctrcbc.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_init(br_aes_small_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_small_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_keysched(ctx->skey, key, len);
+}
+
+static void
+xorbuf(void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		*d ++ ^= *s ++;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_ctr(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf, *bctr;
+	uint32_t cc0, cc1, cc2, cc3;
+
+	buf = data;
+	bctr = ctr;
+	cc3 = br_dec32be(bctr +  0);
+	cc2 = br_dec32be(bctr +  4);
+	cc1 = br_dec32be(bctr +  8);
+	cc0 = br_dec32be(bctr + 12);
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t carry;
+
+		br_enc32be(tmp +  0, cc3);
+		br_enc32be(tmp +  4, cc2);
+		br_enc32be(tmp +  8, cc1);
+		br_enc32be(tmp + 12, cc0);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, tmp);
+		xorbuf(buf, tmp, 16);
+		buf += 16;
+		len -= 16;
+		cc0 ++;
+		carry = (~(cc0 | -cc0)) >> 31;
+		cc1 += carry;
+		carry &= (~(cc1 | -cc1)) >> 31;
+		cc2 += carry;
+		carry &= (~(cc2 | -cc2)) >> 31;
+		cc3 += carry;
+	}
+	br_enc32be(bctr +  0, cc3);
+	br_enc32be(bctr +  4, cc2);
+	br_enc32be(bctr +  8, cc1);
+	br_enc32be(bctr + 12, cc0);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_mac(const br_aes_small_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		xorbuf(cbcmac, buf, 16);
+		br_aes_small_encrypt(ctx->num_rounds, ctx->skey, cbcmac);
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_encrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+	br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+}
+
+/* see bearssl_block.h */
+void
+br_aes_small_ctrcbc_decrypt(const br_aes_small_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	br_aes_small_ctrcbc_mac(ctx, cbcmac, data, len);
+	br_aes_small_ctrcbc_ctr(ctx, ctr, data, len);
+}
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_small_ctrcbc_vtable = {
+	sizeof(br_aes_small_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_small_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_small_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_small_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_small_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_small_ctrcbc_mac
+};
diff --git a/third_party/bearssl/src/aes_small_dec.c b/third_party/bearssl/src/aes_small_dec.c
new file mode 100644
index 0000000..59dca8e
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_dec.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Inverse S-box.
+ */
+static const unsigned char iS[] = {
+	0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38, 0xBF, 0x40, 0xA3, 0x9E,
+	0x81, 0xF3, 0xD7, 0xFB, 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+	0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB, 0x54, 0x7B, 0x94, 0x32,
+	0xA6, 0xC2, 0x23, 0x3D, 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+	0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2, 0x76, 0x5B, 0xA2, 0x49,
+	0x6D, 0x8B, 0xD1, 0x25, 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+	0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92, 0x6C, 0x70, 0x48, 0x50,
+	0xFD, 0xED, 0xB9, 0xDA, 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+	0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A, 0xF7, 0xE4, 0x58, 0x05,
+	0xB8, 0xB3, 0x45, 0x06, 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+	0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B, 0x3A, 0x91, 0x11, 0x41,
+	0x4F, 0x67, 0xDC, 0xEA, 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+	0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85, 0xE2, 0xF9, 0x37, 0xE8,
+	0x1C, 0x75, 0xDF, 0x6E, 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+	0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B, 0xFC, 0x56, 0x3E, 0x4B,
+	0xC6, 0xD2, 0x79, 0x20, 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+	0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31, 0xB1, 0x12, 0x10, 0x59,
+	0x27, 0x80, 0xEC, 0x5F, 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+	0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF, 0xA0, 0xE0, 0x3B, 0x4D,
+	0xAE, 0x2A, 0xF5, 0xB0, 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+	0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26, 0xE1, 0x69, 0x14, 0x63,
+	0x55, 0x21, 0x0C, 0x7D
+};
+
+static void
+add_round_key(unsigned *state, const uint32_t *skeys)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		uint32_t k;
+
+		k = *skeys ++;
+		state[i + 0] ^= (unsigned)(k >> 24);
+		state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
+		state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
+		state[i + 3] ^= (unsigned)k & 0xFF;
+	}
+}
+
+static void
+inv_sub_bytes(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		state[i] = iS[state[i]];
+	}
+}
+
+static void
+inv_shift_rows(unsigned *state)
+{
+	unsigned tmp;
+
+	tmp = state[13];
+	state[13] = state[9];
+	state[9] = state[5];
+	state[5] = state[1];
+	state[1] = tmp;
+
+	tmp = state[2];
+	state[2] = state[10];
+	state[10] = tmp;
+	tmp = state[6];
+	state[6] = state[14];
+	state[14] = tmp;
+
+	tmp = state[3];
+	state[3] = state[7];
+	state[7] = state[11];
+	state[11] = state[15];
+	state[15] = tmp;
+}
+
+static inline unsigned
+gf256red(unsigned x)
+{
+	unsigned y;
+
+	y = x >> 8;
+	return (x ^ y ^ (y << 1) ^ (y << 3) ^ (y << 4)) & 0xFF;
+}
+
+static void
+inv_mix_columns(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		unsigned s0, s1, s2, s3;
+		unsigned t0, t1, t2, t3;
+
+		s0 = state[i + 0];
+		s1 = state[i + 1];
+		s2 = state[i + 2];
+		s3 = state[i + 3];
+		t0 = (s0 << 1) ^ (s0 << 2) ^ (s0 << 3)
+			^ s1 ^ (s1 << 1) ^ (s1 << 3)
+			^ s2 ^ (s2 << 2) ^ (s2 << 3)
+			^ s3 ^ (s3 << 3);
+		t1 = s0 ^ (s0 << 3)
+			^ (s1 << 1) ^ (s1 << 2) ^ (s1 << 3)
+			^ s2 ^ (s2 << 1) ^ (s2 << 3)
+			^ s3 ^ (s3 << 2) ^ (s3 << 3);
+		t2 = s0 ^ (s0 << 2) ^ (s0 << 3)
+			^ s1 ^ (s1 << 3)
+			^ (s2 << 1) ^ (s2 << 2) ^ (s2 << 3)
+			^ s3 ^ (s3 << 1) ^ (s3 << 3);
+		t3 = s0 ^ (s0 << 1) ^ (s0 << 3)
+			^ s1 ^ (s1 << 2) ^ (s1 << 3)
+			^ s2 ^ (s2 << 3)
+			^ (s3 << 1) ^ (s3 << 2) ^ (s3 << 3);
+		state[i + 0] = gf256red(t0);
+		state[i + 1] = gf256red(t1);
+		state[i + 2] = gf256red(t2);
+		state[i + 3] = gf256red(t3);
+	}
+}
+
+/* see inner.h */
+void
+br_aes_small_decrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	unsigned state[16];
+	unsigned u;
+
+	buf = data;
+	for (u = 0; u < 16; u ++) {
+		state[u] = buf[u];
+	}
+	add_round_key(state, skey + (num_rounds << 2));
+	for (u = num_rounds - 1; u > 0; u --) {
+		inv_shift_rows(state);
+		inv_sub_bytes(state);
+		add_round_key(state, skey + (u << 2));
+		inv_mix_columns(state);
+	}
+	inv_shift_rows(state);
+	inv_sub_bytes(state);
+	add_round_key(state, skey);
+	for (u = 0; u < 16; u ++) {
+		buf[u] = state[u];
+	}
+}
diff --git a/third_party/bearssl/src/aes_small_enc.c b/third_party/bearssl/src/aes_small_enc.c
new file mode 100644
index 0000000..29f48a8
--- /dev/null
+++ b/third_party/bearssl/src/aes_small_enc.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define S   br_aes_S
+
+static void
+add_round_key(unsigned *state, const uint32_t *skeys)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		uint32_t k;
+
+		k = *skeys ++;
+		state[i + 0] ^= (unsigned)(k >> 24);
+		state[i + 1] ^= (unsigned)(k >> 16) & 0xFF;
+		state[i + 2] ^= (unsigned)(k >> 8) & 0xFF;
+		state[i + 3] ^= (unsigned)k & 0xFF;
+	}
+}
+
+static void
+sub_bytes(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		state[i] = S[state[i]];
+	}
+}
+
+static void
+shift_rows(unsigned *state)
+{
+	unsigned tmp;
+
+	tmp = state[1];
+	state[1] = state[5];
+	state[5] = state[9];
+	state[9] = state[13];
+	state[13] = tmp;
+
+	tmp = state[2];
+	state[2] = state[10];
+	state[10] = tmp;
+	tmp = state[6];
+	state[6] = state[14];
+	state[14] = tmp;
+
+	tmp = state[15];
+	state[15] = state[11];
+	state[11] = state[7];
+	state[7] = state[3];
+	state[3] = tmp;
+}
+
+static void
+mix_columns(unsigned *state)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 4) {
+		unsigned s0, s1, s2, s3;
+		unsigned t0, t1, t2, t3;
+
+		s0 = state[i + 0];
+		s1 = state[i + 1];
+		s2 = state[i + 2];
+		s3 = state[i + 3];
+		t0 = (s0 << 1) ^ s1 ^ (s1 << 1) ^ s2 ^ s3;
+		t1 = s0 ^ (s1 << 1) ^ s2 ^ (s2 << 1) ^ s3;
+		t2 = s0 ^ s1 ^ (s2 << 1) ^ s3 ^ (s3 << 1);
+		t3 = s0 ^ (s0 << 1) ^ s1 ^ s2 ^ (s3 << 1);
+		state[i + 0] = t0 ^ ((unsigned)(-(int)(t0 >> 8)) & 0x11B);
+		state[i + 1] = t1 ^ ((unsigned)(-(int)(t1 >> 8)) & 0x11B);
+		state[i + 2] = t2 ^ ((unsigned)(-(int)(t2 >> 8)) & 0x11B);
+		state[i + 3] = t3 ^ ((unsigned)(-(int)(t3 >> 8)) & 0x11B);
+	}
+}
+
+/* see inner.h */
+void
+br_aes_small_encrypt(unsigned num_rounds, const uint32_t *skey, void *data)
+{
+	unsigned char *buf;
+	unsigned state[16];
+	unsigned u;
+
+	buf = data;
+	for (u = 0; u < 16; u ++) {
+		state[u] = buf[u];
+	}
+	add_round_key(state, skey);
+	for (u = 1; u < num_rounds; u ++) {
+		sub_bytes(state);
+		shift_rows(state);
+		mix_columns(state);
+		add_round_key(state, skey + (u << 2));
+	}
+	sub_bytes(state);
+	shift_rows(state);
+	add_round_key(state, skey + (num_rounds << 2));
+	for (u = 0; u < 16; u ++) {
+		buf[u] = state[u];
+	}
+}
diff --git a/third_party/bearssl/src/aes_x86ni.c b/third_party/bearssl/src/aes_x86ni.c
new file mode 100644
index 0000000..d5408f1
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni.c
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+/*
+ * This code contains the AES key schedule implementation using the
+ * AES-NI opcodes.
+ */
+
+#if BR_AES_X86NI
+
+/* see inner.h */
+int
+br_aes_x86ni_supported(void)
+{
+	/*
+	 * Bit mask for features in ECX:
+	 *   19   SSE4.1 (used for _mm_insert_epi32(), for AES-CTR)
+	 *   25   AES-NI
+	 */
+	return br_cpuid(0, 0, 0x02080000, 0);
+}
+
+BR_TARGETS_X86_UP
+
+BR_TARGET("sse2,aes")
+static inline __m128i
+expand_step128(__m128i k, __m128i k2)
+{
+	k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+	k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+	k = _mm_xor_si128(k, _mm_slli_si128(k, 4));
+	k2 = _mm_shuffle_epi32(k2, 0xFF);
+	return _mm_xor_si128(k, k2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step192(__m128i *t1, __m128i *t2, __m128i *t3)
+{
+	__m128i t4;
+
+	*t2 = _mm_shuffle_epi32(*t2, 0x55);
+	t4 = _mm_slli_si128(*t1, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	*t1 = _mm_xor_si128(*t1, *t2);
+	*t2 = _mm_shuffle_epi32(*t1, 0xFF);
+	t4 = _mm_slli_si128(*t3, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	*t3 = _mm_xor_si128(*t3, *t2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step256_1(__m128i *t1, __m128i *t2)
+{
+	__m128i t4;
+
+	*t2 = _mm_shuffle_epi32(*t2, 0xFF);
+	t4 = _mm_slli_si128(*t1, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t1 = _mm_xor_si128(*t1, t4);
+	*t1 = _mm_xor_si128(*t1, *t2);
+}
+
+BR_TARGET("sse2,aes")
+static inline void
+expand_step256_2(__m128i *t1, __m128i *t3)
+{
+	__m128i t2, t4;
+
+	t4 = _mm_aeskeygenassist_si128(*t1, 0x0);
+	t2 = _mm_shuffle_epi32(t4, 0xAA);
+	t4 = _mm_slli_si128(*t3, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	t4 = _mm_slli_si128(t4, 0x4);
+	*t3 = _mm_xor_si128(*t3, t4);
+	*t3 = _mm_xor_si128(*t3, t2);
+}
+
+/*
+ * Perform key schedule for AES, encryption direction. Subkeys are written
+ * in sk[], and the number of rounds is returned. Key length MUST be 16,
+ * 24 or 32 bytes.
+ */
+BR_TARGET("sse2,aes")
+static unsigned
+x86ni_keysched(__m128i *sk, const void *key, size_t len)
+{
+	const unsigned char *kb;
+
+#define KEXP128(k, i, rcon)   do { \
+		k = expand_step128(k, _mm_aeskeygenassist_si128(k, rcon)); \
+		sk[i] = k; \
+	} while (0)
+
+#define KEXP192(i, rcon1, rcon2)   do { \
+		sk[(i) + 0] = t1; \
+		sk[(i) + 1] = t3; \
+		t2 = _mm_aeskeygenassist_si128(t3, rcon1); \
+		expand_step192(&t1, &t2, &t3); \
+		sk[(i) + 1] = _mm_castpd_si128(_mm_shuffle_pd( \
+			_mm_castsi128_pd(sk[(i) + 1]), \
+			_mm_castsi128_pd(t1), 0)); \
+		sk[(i) + 2] = _mm_castpd_si128(_mm_shuffle_pd( \
+			_mm_castsi128_pd(t1), \
+			_mm_castsi128_pd(t3), 1)); \
+		t2 = _mm_aeskeygenassist_si128(t3, rcon2); \
+		expand_step192(&t1, &t2, &t3); \
+	} while (0)
+
+#define KEXP256(i, rcon)   do { \
+		sk[(i) + 0] = t3; \
+		t2 = _mm_aeskeygenassist_si128(t3, rcon); \
+		expand_step256_1(&t1, &t2); \
+		sk[(i) + 1] = t1; \
+		expand_step256_2(&t1, &t3); \
+	} while (0)
+
+	kb = key;
+	switch (len) {
+		__m128i t1, t2, t3;
+
+	case 16:
+		t1 = _mm_loadu_si128((const void *)kb);
+		sk[0] = t1;
+		KEXP128(t1,  1, 0x01);
+		KEXP128(t1,  2, 0x02);
+		KEXP128(t1,  3, 0x04);
+		KEXP128(t1,  4, 0x08);
+		KEXP128(t1,  5, 0x10);
+		KEXP128(t1,  6, 0x20);
+		KEXP128(t1,  7, 0x40);
+		KEXP128(t1,  8, 0x80);
+		KEXP128(t1,  9, 0x1B);
+		KEXP128(t1, 10, 0x36);
+		return 10;
+
+	case 24:
+		t1 = _mm_loadu_si128((const void *)kb);
+		t3 = _mm_loadu_si128((const void *)(kb + 8));
+		t3 = _mm_shuffle_epi32(t3, 0x4E);
+		KEXP192(0, 0x01, 0x02);
+		KEXP192(3, 0x04, 0x08);
+		KEXP192(6, 0x10, 0x20);
+		KEXP192(9, 0x40, 0x80);
+		sk[12] = t1;
+		return 12;
+
+	case 32:
+		t1 = _mm_loadu_si128((const void *)kb);
+		t3 = _mm_loadu_si128((const void *)(kb + 16));
+		sk[0] = t1;
+		KEXP256( 1, 0x01);
+		KEXP256( 3, 0x02);
+		KEXP256( 5, 0x04);
+		KEXP256( 7, 0x08);
+		KEXP256( 9, 0x10);
+		KEXP256(11, 0x20);
+		sk[13] = t3;
+		t2 = _mm_aeskeygenassist_si128(t3, 0x40);
+		expand_step256_1(&t1, &t2);
+		sk[14] = t1;
+		return 14;
+
+	default:
+		return 0;
+	}
+
+#undef KEXP128
+#undef KEXP192
+#undef KEXP256
+}
+
+/* see inner.h */
+BR_TARGET("sse2,aes")
+unsigned
+br_aes_x86ni_keysched_enc(unsigned char *skni, const void *key, size_t len)
+{
+	__m128i sk[15];
+	unsigned num_rounds;
+
+	num_rounds = x86ni_keysched(sk, key, len);
+	memcpy(skni, sk, (num_rounds + 1) << 4);
+	return num_rounds;
+}
+
+/* see inner.h */
+BR_TARGET("sse2,aes")
+unsigned
+br_aes_x86ni_keysched_dec(unsigned char *skni, const void *key, size_t len)
+{
+	__m128i sk[15];
+	unsigned u, num_rounds;
+
+	num_rounds = x86ni_keysched(sk, key, len);
+	_mm_storeu_si128((void *)skni, sk[num_rounds]);
+	for (u = 1; u < num_rounds; u ++) {
+		_mm_storeu_si128((void *)(skni + (u << 4)),
+			_mm_aesimc_si128(sk[num_rounds - u]));
+	}
+	_mm_storeu_si128((void *)(skni + (num_rounds << 4)), sk[0]);
+	return num_rounds;
+}
+
+BR_TARGETS_X86_DOWN
+
+#endif
diff --git a/third_party/bearssl/src/aes_x86ni_cbcdec.c b/third_party/bearssl/src/aes_x86ni_cbcdec.c
new file mode 100644
index 0000000..862b1b5
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni_cbcdec.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_x86ni_cbcdec_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcdec_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_cbcdec_init(br_aes_x86ni_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_cbcdec_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_dec(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,aes")
+void
+br_aes_x86ni_cbcdec_run(const br_aes_x86ni_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(iv);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x0, x1, x2, x3, e0, e1, e2, e3;
+
+		x0 = _mm_loadu_si128((void *)(buf +  0));
+		if (len >= 64) {
+			x1 = _mm_loadu_si128((void *)(buf + 16));
+			x2 = _mm_loadu_si128((void *)(buf + 32));
+			x3 = _mm_loadu_si128((void *)(buf + 48));
+		} else {
+			x0 = _mm_loadu_si128((void *)(buf +  0));
+			if (len >= 32) {
+				x1 = _mm_loadu_si128((void *)(buf + 16));
+				if (len >= 48) {
+					x2 = _mm_loadu_si128(
+						(void *)(buf + 32));
+					x3 = x2;
+				} else {
+					x2 = x0;
+					x3 = x1;
+				}
+			} else {
+				x1 = x0;
+				x2 = x0;
+				x3 = x0;
+			}
+		}
+		e0 = x0;
+		e1 = x1;
+		e2 = x2;
+		e3 = x3;
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesdec_si128(x0, sk[1]);
+		x1 = _mm_aesdec_si128(x1, sk[1]);
+		x2 = _mm_aesdec_si128(x2, sk[1]);
+		x3 = _mm_aesdec_si128(x3, sk[1]);
+		x0 = _mm_aesdec_si128(x0, sk[2]);
+		x1 = _mm_aesdec_si128(x1, sk[2]);
+		x2 = _mm_aesdec_si128(x2, sk[2]);
+		x3 = _mm_aesdec_si128(x3, sk[2]);
+		x0 = _mm_aesdec_si128(x0, sk[3]);
+		x1 = _mm_aesdec_si128(x1, sk[3]);
+		x2 = _mm_aesdec_si128(x2, sk[3]);
+		x3 = _mm_aesdec_si128(x3, sk[3]);
+		x0 = _mm_aesdec_si128(x0, sk[4]);
+		x1 = _mm_aesdec_si128(x1, sk[4]);
+		x2 = _mm_aesdec_si128(x2, sk[4]);
+		x3 = _mm_aesdec_si128(x3, sk[4]);
+		x0 = _mm_aesdec_si128(x0, sk[5]);
+		x1 = _mm_aesdec_si128(x1, sk[5]);
+		x2 = _mm_aesdec_si128(x2, sk[5]);
+		x3 = _mm_aesdec_si128(x3, sk[5]);
+		x0 = _mm_aesdec_si128(x0, sk[6]);
+		x1 = _mm_aesdec_si128(x1, sk[6]);
+		x2 = _mm_aesdec_si128(x2, sk[6]);
+		x3 = _mm_aesdec_si128(x3, sk[6]);
+		x0 = _mm_aesdec_si128(x0, sk[7]);
+		x1 = _mm_aesdec_si128(x1, sk[7]);
+		x2 = _mm_aesdec_si128(x2, sk[7]);
+		x3 = _mm_aesdec_si128(x3, sk[7]);
+		x0 = _mm_aesdec_si128(x0, sk[8]);
+		x1 = _mm_aesdec_si128(x1, sk[8]);
+		x2 = _mm_aesdec_si128(x2, sk[8]);
+		x3 = _mm_aesdec_si128(x3, sk[8]);
+		x0 = _mm_aesdec_si128(x0, sk[9]);
+		x1 = _mm_aesdec_si128(x1, sk[9]);
+		x2 = _mm_aesdec_si128(x2, sk[9]);
+		x3 = _mm_aesdec_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesdeclast_si128(x0, sk[10]);
+			x1 = _mm_aesdeclast_si128(x1, sk[10]);
+			x2 = _mm_aesdeclast_si128(x2, sk[10]);
+			x3 = _mm_aesdeclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesdec_si128(x0, sk[10]);
+			x1 = _mm_aesdec_si128(x1, sk[10]);
+			x2 = _mm_aesdec_si128(x2, sk[10]);
+			x3 = _mm_aesdec_si128(x3, sk[10]);
+			x0 = _mm_aesdec_si128(x0, sk[11]);
+			x1 = _mm_aesdec_si128(x1, sk[11]);
+			x2 = _mm_aesdec_si128(x2, sk[11]);
+			x3 = _mm_aesdec_si128(x3, sk[11]);
+			x0 = _mm_aesdeclast_si128(x0, sk[12]);
+			x1 = _mm_aesdeclast_si128(x1, sk[12]);
+			x2 = _mm_aesdeclast_si128(x2, sk[12]);
+			x3 = _mm_aesdeclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesdec_si128(x0, sk[10]);
+			x1 = _mm_aesdec_si128(x1, sk[10]);
+			x2 = _mm_aesdec_si128(x2, sk[10]);
+			x3 = _mm_aesdec_si128(x3, sk[10]);
+			x0 = _mm_aesdec_si128(x0, sk[11]);
+			x1 = _mm_aesdec_si128(x1, sk[11]);
+			x2 = _mm_aesdec_si128(x2, sk[11]);
+			x3 = _mm_aesdec_si128(x3, sk[11]);
+			x0 = _mm_aesdec_si128(x0, sk[12]);
+			x1 = _mm_aesdec_si128(x1, sk[12]);
+			x2 = _mm_aesdec_si128(x2, sk[12]);
+			x3 = _mm_aesdec_si128(x3, sk[12]);
+			x0 = _mm_aesdec_si128(x0, sk[13]);
+			x1 = _mm_aesdec_si128(x1, sk[13]);
+			x2 = _mm_aesdec_si128(x2, sk[13]);
+			x3 = _mm_aesdec_si128(x3, sk[13]);
+			x0 = _mm_aesdeclast_si128(x0, sk[14]);
+			x1 = _mm_aesdeclast_si128(x1, sk[14]);
+			x2 = _mm_aesdeclast_si128(x2, sk[14]);
+			x3 = _mm_aesdeclast_si128(x3, sk[14]);
+		}
+		x0 = _mm_xor_si128(x0, ivx);
+		x1 = _mm_xor_si128(x1, e0);
+		x2 = _mm_xor_si128(x2, e1);
+		x3 = _mm_xor_si128(x3, e2);
+		ivx = e3;
+		_mm_storeu_si128((void *)(buf +  0), x0);
+		if (len >= 64) {
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+		} else {
+			if (len >= 32) {
+				_mm_storeu_si128((void *)(buf + 16), x1);
+				if (len >= 48) {
+					_mm_storeu_si128(
+						(void *)(buf + 32), x2);
+				}
+			}
+			break;
+		}
+	}
+	_mm_storeu_si128(iv, ivx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_aes_x86ni_cbcdec_vtable = {
+	sizeof(br_aes_x86ni_cbcdec_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_aes_x86ni_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_aes_x86ni_cbcdec_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class *
+br_aes_x86ni_cbcdec_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_x86ni_cbcenc.c b/third_party/bearssl/src/aes_x86ni_cbcenc.c
new file mode 100644
index 0000000..85feecd
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni_cbcenc.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_x86ni_cbcenc_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_cbcenc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_cbcenc_init(br_aes_x86ni_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_cbcenc_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,aes")
+void
+br_aes_x86ni_cbcenc_run(const br_aes_x86ni_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(iv);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x;
+
+		x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
+		x = _mm_xor_si128(x, sk[0]);
+		x = _mm_aesenc_si128(x, sk[1]);
+		x = _mm_aesenc_si128(x, sk[2]);
+		x = _mm_aesenc_si128(x, sk[3]);
+		x = _mm_aesenc_si128(x, sk[4]);
+		x = _mm_aesenc_si128(x, sk[5]);
+		x = _mm_aesenc_si128(x, sk[6]);
+		x = _mm_aesenc_si128(x, sk[7]);
+		x = _mm_aesenc_si128(x, sk[8]);
+		x = _mm_aesenc_si128(x, sk[9]);
+		if (num_rounds == 10) {
+			x = _mm_aesenclast_si128(x, sk[10]);
+		} else if (num_rounds == 12) {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenclast_si128(x, sk[12]);
+		} else {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenc_si128(x, sk[12]);
+			x = _mm_aesenc_si128(x, sk[13]);
+			x = _mm_aesenclast_si128(x, sk[14]);
+		}
+		ivx = x;
+		_mm_storeu_si128((void *)buf, x);
+		buf += 16;
+		len -= 16;
+	}
+	_mm_storeu_si128(iv, ivx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_aes_x86ni_cbcenc_vtable = {
+	sizeof(br_aes_x86ni_cbcenc_keys),
+	16,
+	4,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_aes_x86ni_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_aes_x86ni_cbcenc_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class *
+br_aes_x86ni_cbcenc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_x86ni_ctr.c b/third_party/bearssl/src/aes_x86ni_ctr.c
new file mode 100644
index 0000000..1cddd60
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni_ctr.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_x86ni_ctr_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_ctr_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_ctr_init(br_aes_x86ni_ctr_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_ctr_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+uint32_t
+br_aes_x86ni_ctr_run(const br_aes_x86ni_ctr_keys *ctx,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned char ivbuf[16];
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx;
+	unsigned u;
+
+	buf = data;
+	memcpy(ivbuf, iv, 12);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	ivx = _mm_loadu_si128((void *)ivbuf);
+	while (len > 0) {
+		__m128i x0, x1, x2, x3;
+
+		x0 = _mm_insert_epi32(ivx, br_bswap32(cc + 0), 3);
+		x1 = _mm_insert_epi32(ivx, br_bswap32(cc + 1), 3);
+		x2 = _mm_insert_epi32(ivx, br_bswap32(cc + 2), 3);
+		x3 = _mm_insert_epi32(ivx, br_bswap32(cc + 3), 3);
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x2 = _mm_aesenc_si128(x2, sk[1]);
+		x3 = _mm_aesenc_si128(x3, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x2 = _mm_aesenc_si128(x2, sk[2]);
+		x3 = _mm_aesenc_si128(x3, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x2 = _mm_aesenc_si128(x2, sk[3]);
+		x3 = _mm_aesenc_si128(x3, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x2 = _mm_aesenc_si128(x2, sk[4]);
+		x3 = _mm_aesenc_si128(x3, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x2 = _mm_aesenc_si128(x2, sk[5]);
+		x3 = _mm_aesenc_si128(x3, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x2 = _mm_aesenc_si128(x2, sk[6]);
+		x3 = _mm_aesenc_si128(x3, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x2 = _mm_aesenc_si128(x2, sk[7]);
+		x3 = _mm_aesenc_si128(x3, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x2 = _mm_aesenc_si128(x2, sk[8]);
+		x3 = _mm_aesenc_si128(x3, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		x2 = _mm_aesenc_si128(x2, sk[9]);
+		x3 = _mm_aesenc_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+			x2 = _mm_aesenclast_si128(x2, sk[10]);
+			x3 = _mm_aesenclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+			x2 = _mm_aesenclast_si128(x2, sk[12]);
+			x3 = _mm_aesenclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x2 = _mm_aesenc_si128(x2, sk[12]);
+			x3 = _mm_aesenc_si128(x3, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x2 = _mm_aesenc_si128(x2, sk[13]);
+			x3 = _mm_aesenc_si128(x3, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+			x2 = _mm_aesenclast_si128(x2, sk[14]);
+			x3 = _mm_aesenclast_si128(x3, sk[14]);
+		}
+		if (len >= 64) {
+			x0 = _mm_xor_si128(x0,
+				_mm_loadu_si128((void *)(buf +  0)));
+			x1 = _mm_xor_si128(x1,
+				_mm_loadu_si128((void *)(buf + 16)));
+			x2 = _mm_xor_si128(x2,
+				_mm_loadu_si128((void *)(buf + 32)));
+			x3 = _mm_xor_si128(x3,
+				_mm_loadu_si128((void *)(buf + 48)));
+			_mm_storeu_si128((void *)(buf +  0), x0);
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+			cc += 4;
+		} else {
+			unsigned char tmp[64];
+
+			_mm_storeu_si128((void *)(tmp +  0), x0);
+			_mm_storeu_si128((void *)(tmp + 16), x1);
+			_mm_storeu_si128((void *)(tmp + 32), x2);
+			_mm_storeu_si128((void *)(tmp + 48), x3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			cc += (uint32_t)len >> 4;
+			break;
+		}
+	}
+	return cc;
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_ctr_class br_aes_x86ni_ctr_vtable = {
+	sizeof(br_aes_x86ni_ctr_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctr_class **, const void *, size_t))
+		&br_aes_x86ni_ctr_init,
+	(uint32_t (*)(const br_block_ctr_class *const *,
+		const void *, uint32_t, void *, size_t))
+		&br_aes_x86ni_ctr_run
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctr_class *
+br_aes_x86ni_ctr_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aes_x86ni_ctrcbc.c b/third_party/bearssl/src/aes_x86ni_ctrcbc.c
new file mode 100644
index 0000000..f57fead
--- /dev/null
+++ b/third_party/bearssl/src/aes_x86ni_ctrcbc.c
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_AES_X86NI
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+	return br_aes_x86ni_supported() ? &br_aes_x86ni_ctrcbc_vtable : NULL;
+}
+
+/* see bearssl_block.h */
+void
+br_aes_x86ni_ctrcbc_init(br_aes_x86ni_ctrcbc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_aes_x86ni_ctrcbc_vtable;
+	ctx->num_rounds = br_aes_x86ni_keysched_enc(ctx->skey.skni, key, len);
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_ctr(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx0, ivx1, ivx2, ivx3;
+	__m128i erev, zero, one, four, notthree;
+	unsigned u;
+
+	buf = data;
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+	four = _mm_set_epi64x(0, 4);
+	notthree = _mm_sub_epi64(zero, four);
+
+	/*
+	 * Decode the counter in big-endian and pre-increment the other
+	 * three counters.
+	 */
+	ivx0 = _mm_shuffle_epi8(_mm_loadu_si128((void *)ctr), erev);
+	ivx1 = _mm_add_epi64(ivx0, one);
+	ivx1 = _mm_sub_epi64(ivx1,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx1, zero), 8));
+	ivx2 = _mm_add_epi64(ivx1, one);
+	ivx2 = _mm_sub_epi64(ivx2,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx2, zero), 8));
+	ivx3 = _mm_add_epi64(ivx2, one);
+	ivx3 = _mm_sub_epi64(ivx3,
+		_mm_slli_si128(_mm_cmpeq_epi64(ivx3, zero), 8));
+	while (len > 0) {
+		__m128i x0, x1, x2, x3;
+
+		/*
+		 * Load counter values; we need to byteswap them because
+		 * the specification says that they use big-endian.
+		 */
+		x0 = _mm_shuffle_epi8(ivx0, erev);
+		x1 = _mm_shuffle_epi8(ivx1, erev);
+		x2 = _mm_shuffle_epi8(ivx2, erev);
+		x3 = _mm_shuffle_epi8(ivx3, erev);
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x2 = _mm_xor_si128(x2, sk[0]);
+		x3 = _mm_xor_si128(x3, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x2 = _mm_aesenc_si128(x2, sk[1]);
+		x3 = _mm_aesenc_si128(x3, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x2 = _mm_aesenc_si128(x2, sk[2]);
+		x3 = _mm_aesenc_si128(x3, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x2 = _mm_aesenc_si128(x2, sk[3]);
+		x3 = _mm_aesenc_si128(x3, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x2 = _mm_aesenc_si128(x2, sk[4]);
+		x3 = _mm_aesenc_si128(x3, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x2 = _mm_aesenc_si128(x2, sk[5]);
+		x3 = _mm_aesenc_si128(x3, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x2 = _mm_aesenc_si128(x2, sk[6]);
+		x3 = _mm_aesenc_si128(x3, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x2 = _mm_aesenc_si128(x2, sk[7]);
+		x3 = _mm_aesenc_si128(x3, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x2 = _mm_aesenc_si128(x2, sk[8]);
+		x3 = _mm_aesenc_si128(x3, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		x2 = _mm_aesenc_si128(x2, sk[9]);
+		x3 = _mm_aesenc_si128(x3, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+			x2 = _mm_aesenclast_si128(x2, sk[10]);
+			x3 = _mm_aesenclast_si128(x3, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+			x2 = _mm_aesenclast_si128(x2, sk[12]);
+			x3 = _mm_aesenclast_si128(x3, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x2 = _mm_aesenc_si128(x2, sk[10]);
+			x3 = _mm_aesenc_si128(x3, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x2 = _mm_aesenc_si128(x2, sk[11]);
+			x3 = _mm_aesenc_si128(x3, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x2 = _mm_aesenc_si128(x2, sk[12]);
+			x3 = _mm_aesenc_si128(x3, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x2 = _mm_aesenc_si128(x2, sk[13]);
+			x3 = _mm_aesenc_si128(x3, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+			x2 = _mm_aesenclast_si128(x2, sk[14]);
+			x3 = _mm_aesenclast_si128(x3, sk[14]);
+		}
+		if (len >= 64) {
+			x0 = _mm_xor_si128(x0,
+				_mm_loadu_si128((void *)(buf +  0)));
+			x1 = _mm_xor_si128(x1,
+				_mm_loadu_si128((void *)(buf + 16)));
+			x2 = _mm_xor_si128(x2,
+				_mm_loadu_si128((void *)(buf + 32)));
+			x3 = _mm_xor_si128(x3,
+				_mm_loadu_si128((void *)(buf + 48)));
+			_mm_storeu_si128((void *)(buf +  0), x0);
+			_mm_storeu_si128((void *)(buf + 16), x1);
+			_mm_storeu_si128((void *)(buf + 32), x2);
+			_mm_storeu_si128((void *)(buf + 48), x3);
+			buf += 64;
+			len -= 64;
+		} else {
+			unsigned char tmp[64];
+
+			_mm_storeu_si128((void *)(tmp +  0), x0);
+			_mm_storeu_si128((void *)(tmp + 16), x1);
+			_mm_storeu_si128((void *)(tmp + 32), x2);
+			_mm_storeu_si128((void *)(tmp + 48), x3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			switch (len) {
+			case 16:
+				ivx0 = ivx1;
+				break;
+			case 32:
+				ivx0 = ivx2;
+				break;
+			case 48:
+				ivx0 = ivx3;
+				break;
+			}
+			break;
+		}
+
+		/*
+		 * Add 4 to each counter value. For carry propagation
+		 * into the upper 64-bit words, we would need to compare
+		 * the results with 4, but SSE2+ has only _signed_
+		 * comparisons. Instead, we mask out the low two bits,
+		 * and check whether the remaining bits are zero.
+		 */
+		ivx0 = _mm_add_epi64(ivx0, four);
+		ivx1 = _mm_add_epi64(ivx1, four);
+		ivx2 = _mm_add_epi64(ivx2, four);
+		ivx3 = _mm_add_epi64(ivx3, four);
+		ivx0 = _mm_sub_epi64(ivx0,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx0, notthree), zero), 8));
+		ivx1 = _mm_sub_epi64(ivx1,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx1, notthree), zero), 8));
+		ivx2 = _mm_sub_epi64(ivx2,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx2, notthree), zero), 8));
+		ivx3 = _mm_sub_epi64(ivx3,
+			_mm_slli_si128(_mm_cmpeq_epi64(
+				_mm_and_si128(ivx3, notthree), zero), 8));
+	}
+
+	/*
+	 * Write back new counter value. The loop took care to put the
+	 * right counter value in ivx0.
+	 */
+	_mm_storeu_si128((void *)ctr, _mm_shuffle_epi8(ivx0, erev));
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_mac(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *cbcmac, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15], ivx;
+	unsigned u;
+
+	buf = data;
+	ivx = _mm_loadu_si128(cbcmac);
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+	while (len > 0) {
+		__m128i x;
+
+		x = _mm_xor_si128(_mm_loadu_si128((void *)buf), ivx);
+		x = _mm_xor_si128(x, sk[0]);
+		x = _mm_aesenc_si128(x, sk[1]);
+		x = _mm_aesenc_si128(x, sk[2]);
+		x = _mm_aesenc_si128(x, sk[3]);
+		x = _mm_aesenc_si128(x, sk[4]);
+		x = _mm_aesenc_si128(x, sk[5]);
+		x = _mm_aesenc_si128(x, sk[6]);
+		x = _mm_aesenc_si128(x, sk[7]);
+		x = _mm_aesenc_si128(x, sk[8]);
+		x = _mm_aesenc_si128(x, sk[9]);
+		if (num_rounds == 10) {
+			x = _mm_aesenclast_si128(x, sk[10]);
+		} else if (num_rounds == 12) {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenclast_si128(x, sk[12]);
+		} else {
+			x = _mm_aesenc_si128(x, sk[10]);
+			x = _mm_aesenc_si128(x, sk[11]);
+			x = _mm_aesenc_si128(x, sk[12]);
+			x = _mm_aesenc_si128(x, sk[13]);
+			x = _mm_aesenclast_si128(x, sk[14]);
+		}
+		ivx = x;
+		buf += 16;
+		len -= 16;
+	}
+	_mm_storeu_si128(cbcmac, ivx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_encrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx, cmx;
+	__m128i erev, zero, one;
+	unsigned u;
+	int first_iter;
+
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+
+	/*
+	 * Decode the counter in big-endian.
+	 */
+	ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+	cmx = _mm_loadu_si128(cbcmac);
+
+	buf = data;
+	first_iter = 1;
+	while (len > 0) {
+		__m128i dx, x0, x1;
+
+		/*
+		 * Load initial values:
+		 *   dx   encrypted block of data
+		 *   x0   counter (for CTR encryption)
+		 *   x1   input for CBC-MAC
+		 */
+		dx = _mm_loadu_si128((void *)buf);
+		x0 = _mm_shuffle_epi8(ivx, erev);
+		x1 = cmx;
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+		}
+
+		x0 = _mm_xor_si128(x0, dx);
+		if (first_iter) {
+			cmx = _mm_xor_si128(cmx, x0);
+			first_iter = 0;
+		} else {
+			cmx = _mm_xor_si128(x1, x0);
+		}
+		_mm_storeu_si128((void *)buf, x0);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * Increment the counter value.
+		 */
+		ivx = _mm_add_epi64(ivx, one);
+		ivx = _mm_sub_epi64(ivx,
+			_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+
+		/*
+		 * If this was the last iteration, then compute the
+		 * extra block encryption to complete CBC-MAC.
+		 */
+		if (len == 0) {
+			cmx = _mm_xor_si128(cmx, sk[0]);
+			cmx = _mm_aesenc_si128(cmx, sk[1]);
+			cmx = _mm_aesenc_si128(cmx, sk[2]);
+			cmx = _mm_aesenc_si128(cmx, sk[3]);
+			cmx = _mm_aesenc_si128(cmx, sk[4]);
+			cmx = _mm_aesenc_si128(cmx, sk[5]);
+			cmx = _mm_aesenc_si128(cmx, sk[6]);
+			cmx = _mm_aesenc_si128(cmx, sk[7]);
+			cmx = _mm_aesenc_si128(cmx, sk[8]);
+			cmx = _mm_aesenc_si128(cmx, sk[9]);
+			if (num_rounds == 10) {
+				cmx = _mm_aesenclast_si128(cmx, sk[10]);
+			} else if (num_rounds == 12) {
+				cmx = _mm_aesenc_si128(cmx, sk[10]);
+				cmx = _mm_aesenc_si128(cmx, sk[11]);
+				cmx = _mm_aesenclast_si128(cmx, sk[12]);
+			} else {
+				cmx = _mm_aesenc_si128(cmx, sk[10]);
+				cmx = _mm_aesenc_si128(cmx, sk[11]);
+				cmx = _mm_aesenc_si128(cmx, sk[12]);
+				cmx = _mm_aesenc_si128(cmx, sk[13]);
+				cmx = _mm_aesenclast_si128(cmx, sk[14]);
+			}
+			break;
+		}
+	}
+
+	/*
+	 * Write back new counter value and CBC-MAC value.
+	 */
+	_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+	_mm_storeu_si128(cbcmac, cmx);
+}
+
+/* see bearssl_block.h */
+BR_TARGET("sse2,sse4.1,aes")
+void
+br_aes_x86ni_ctrcbc_decrypt(const br_aes_x86ni_ctrcbc_keys *ctx,
+	void *ctr, void *cbcmac, void *data, size_t len)
+{
+	unsigned char *buf;
+	unsigned num_rounds;
+	__m128i sk[15];
+	__m128i ivx, cmx;
+	__m128i erev, zero, one;
+	unsigned u;
+
+	num_rounds = ctx->num_rounds;
+	for (u = 0; u <= num_rounds; u ++) {
+		sk[u] = _mm_loadu_si128((void *)(ctx->skey.skni + (u << 4)));
+	}
+
+	/*
+	 * Some SSE2 constants.
+	 */
+	erev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7,
+		8, 9, 10, 11, 12, 13, 14, 15);
+	zero = _mm_setzero_si128();
+	one = _mm_set_epi64x(0, 1);
+
+	/*
+	 * Decode the counter in big-endian.
+	 */
+	ivx = _mm_shuffle_epi8(_mm_loadu_si128(ctr), erev);
+	cmx = _mm_loadu_si128(cbcmac);
+
+	buf = data;
+	while (len > 0) {
+		__m128i dx, x0, x1;
+
+		/*
+		 * Load initial values:
+		 *   dx   encrypted block of data
+		 *   x0   counter (for CTR encryption)
+		 *   x1   input for CBC-MAC
+		 */
+		dx = _mm_loadu_si128((void *)buf);
+		x0 = _mm_shuffle_epi8(ivx, erev);
+		x1 = _mm_xor_si128(cmx, dx);
+
+		x0 = _mm_xor_si128(x0, sk[0]);
+		x1 = _mm_xor_si128(x1, sk[0]);
+		x0 = _mm_aesenc_si128(x0, sk[1]);
+		x1 = _mm_aesenc_si128(x1, sk[1]);
+		x0 = _mm_aesenc_si128(x0, sk[2]);
+		x1 = _mm_aesenc_si128(x1, sk[2]);
+		x0 = _mm_aesenc_si128(x0, sk[3]);
+		x1 = _mm_aesenc_si128(x1, sk[3]);
+		x0 = _mm_aesenc_si128(x0, sk[4]);
+		x1 = _mm_aesenc_si128(x1, sk[4]);
+		x0 = _mm_aesenc_si128(x0, sk[5]);
+		x1 = _mm_aesenc_si128(x1, sk[5]);
+		x0 = _mm_aesenc_si128(x0, sk[6]);
+		x1 = _mm_aesenc_si128(x1, sk[6]);
+		x0 = _mm_aesenc_si128(x0, sk[7]);
+		x1 = _mm_aesenc_si128(x1, sk[7]);
+		x0 = _mm_aesenc_si128(x0, sk[8]);
+		x1 = _mm_aesenc_si128(x1, sk[8]);
+		x0 = _mm_aesenc_si128(x0, sk[9]);
+		x1 = _mm_aesenc_si128(x1, sk[9]);
+		if (num_rounds == 10) {
+			x0 = _mm_aesenclast_si128(x0, sk[10]);
+			x1 = _mm_aesenclast_si128(x1, sk[10]);
+		} else if (num_rounds == 12) {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenclast_si128(x0, sk[12]);
+			x1 = _mm_aesenclast_si128(x1, sk[12]);
+		} else {
+			x0 = _mm_aesenc_si128(x0, sk[10]);
+			x1 = _mm_aesenc_si128(x1, sk[10]);
+			x0 = _mm_aesenc_si128(x0, sk[11]);
+			x1 = _mm_aesenc_si128(x1, sk[11]);
+			x0 = _mm_aesenc_si128(x0, sk[12]);
+			x1 = _mm_aesenc_si128(x1, sk[12]);
+			x0 = _mm_aesenc_si128(x0, sk[13]);
+			x1 = _mm_aesenc_si128(x1, sk[13]);
+			x0 = _mm_aesenclast_si128(x0, sk[14]);
+			x1 = _mm_aesenclast_si128(x1, sk[14]);
+		}
+		x0 = _mm_xor_si128(x0, dx);
+		cmx = x1;
+		_mm_storeu_si128((void *)buf, x0);
+
+		buf += 16;
+		len -= 16;
+
+		/*
+		 * Increment the counter value.
+		 */
+		ivx = _mm_add_epi64(ivx, one);
+		ivx = _mm_sub_epi64(ivx,
+			_mm_slli_si128(_mm_cmpeq_epi64(ivx, zero), 8));
+	}
+
+	/*
+	 * Write back new counter value and CBC-MAC value.
+	 */
+	_mm_storeu_si128(ctr, _mm_shuffle_epi8(ivx, erev));
+	_mm_storeu_si128(cbcmac, cmx);
+}
+
+BR_TARGETS_X86_DOWN
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class br_aes_x86ni_ctrcbc_vtable = {
+	sizeof(br_aes_x86ni_ctrcbc_keys),
+	16,
+	4,
+	(void (*)(const br_block_ctrcbc_class **, const void *, size_t))
+		&br_aes_x86ni_ctrcbc_init,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_encrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_decrypt,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, void *, size_t))
+		&br_aes_x86ni_ctrcbc_ctr,
+	(void (*)(const br_block_ctrcbc_class *const *,
+		void *, const void *, size_t))
+		&br_aes_x86ni_ctrcbc_mac
+};
+
+#else
+
+/* see bearssl_block.h */
+const br_block_ctrcbc_class *
+br_aes_x86ni_ctrcbc_get_vtable(void)
+{
+	return NULL;
+}
+
+#endif
diff --git a/third_party/bearssl/src/aesctr_drbg.c b/third_party/bearssl/src/aesctr_drbg.c
new file mode 100644
index 0000000..8dbd501
--- /dev/null
+++ b/third_party/bearssl/src/aesctr_drbg.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rand.h */
+void
+br_aesctr_drbg_init(br_aesctr_drbg_context *ctx,
+	const br_block_ctr_class *aesctr,
+	const void *seed, size_t len)
+{
+	unsigned char tmp[16];
+
+	ctx->vtable = &br_aesctr_drbg_vtable;
+	memset(tmp, 0, sizeof tmp);
+	aesctr->init(&ctx->sk.vtable, tmp, 16);
+	ctx->cc = 0;
+	br_aesctr_drbg_update(ctx, seed, len);
+}
+
+/* see bearssl_rand.h */
+void
+br_aesctr_drbg_generate(br_aesctr_drbg_context *ctx, void *out, size_t len)
+{
+	unsigned char *buf;
+	unsigned char iv[12];
+
+	buf = out;
+	memset(iv, 0, sizeof iv);
+	while (len > 0) {
+		size_t clen;
+
+		/*
+		 * We generate data by blocks of at most 65280 bytes. This
+		 * allows for unambiguously testing the counter overflow
+		 * condition; also, it should work on 16-bit architectures
+		 * (where 'size_t' is 16 bits only).
+		 */
+		clen = len;
+		if (clen > 65280) {
+			clen = 65280;
+		}
+
+		/*
+		 * We make sure that the counter won't exceed the configured
+		 * limit.
+		 */
+		if ((uint32_t)(ctx->cc + ((clen + 15) >> 4)) > 32768) {
+			clen = (32768 - ctx->cc) << 4;
+			if (clen > len) {
+				clen = len;
+			}
+		}
+
+		/*
+		 * Run CTR.
+		 */
+		memset(buf, 0, clen);
+		ctx->cc = ctx->sk.vtable->run(&ctx->sk.vtable,
+			iv, ctx->cc, buf, clen);
+		buf += clen;
+		len -= clen;
+
+		/*
+		 * Every 32768 blocks, we force a state update.
+		 */
+		if (ctx->cc >= 32768) {
+			br_aesctr_drbg_update(ctx, NULL, 0);
+		}
+	}
+}
+
+/* see bearssl_rand.h */
+void
+br_aesctr_drbg_update(br_aesctr_drbg_context *ctx, const void *seed, size_t len)
+{
+	/*
+	 * We use a Hirose construction on AES-256 to make a hash function.
+	 * Function definition:
+	 *  - running state consists in two 16-byte blocks G and H
+	 *  - initial values of G and H are conventional
+	 *  - there is a fixed block-sized constant C
+	 *  - for next data block m:
+	 *      set AES key to H||m
+	 *      G' = E(G) xor G
+	 *      H' = E(G xor C) xor G xor C
+	 *      G <- G', H <- H'
+	 *  - once all blocks have been processed, output is H||G
+	 *
+	 * Constants:
+	 *   G_init = B6 B6 ... B6
+	 *   H_init = A5 A5 ... A5
+	 *   C      = 01 00 ... 00
+	 *
+	 * With this hash function h(), we compute the new state as
+	 * follows:
+	 *  - produce a state-dependent value s as encryption of an
+	 *    all-one block with AES and the current key
+	 *  - compute the new key as the first 128 bits of h(s||seed)
+	 *
+	 * Original Hirose article:
+	 *    https://www.iacr.org/archive/fse2006/40470213/40470213.pdf
+	 */
+
+	unsigned char s[16], iv[12];
+	unsigned char G[16], H[16];
+	int first;
+
+	/*
+	 * Use an all-one IV to get a fresh output block that depends on the
+	 * current seed.
+	 */
+	memset(iv, 0xFF, sizeof iv);
+	memset(s, 0, 16);
+	ctx->sk.vtable->run(&ctx->sk.vtable, iv, 0xFFFFFFFF, s, 16);
+
+	/*
+	 * Set G[] and H[] to conventional start values.
+	 */
+	memset(G, 0xB6, sizeof G);
+	memset(H, 0x5A, sizeof H);
+
+	/*
+	 * Process the concatenation of the current state and the seed
+	 * with the custom hash function.
+	 */
+	first = 1;
+	for (;;) {
+		unsigned char tmp[32];
+		unsigned char newG[16];
+
+		/*
+		 * Assemble new key H||m into tmp[].
+		 */
+		memcpy(tmp, H, 16);
+		if (first) {
+			memcpy(tmp + 16, s, 16);
+			first = 0;
+		} else {
+			size_t clen;
+
+			if (len == 0) {
+				break;
+			}
+			clen = len < 16 ? len : 16;
+			memcpy(tmp + 16, seed, clen);
+			memset(tmp + 16 + clen, 0, 16 - clen);
+			seed = (const unsigned char *)seed + clen;
+			len -= clen;
+		}
+		ctx->sk.vtable->init(&ctx->sk.vtable, tmp, 32);
+
+		/*
+		 * Compute new G and H values.
+		 */
+		memcpy(iv, G, 12);
+		memcpy(newG, G, 16);
+		ctx->sk.vtable->run(&ctx->sk.vtable, iv,
+			br_dec32be(G + 12), newG, 16);
+		iv[0] ^= 0x01;
+		memcpy(H, G, 16);
+		H[0] ^= 0x01;
+		ctx->sk.vtable->run(&ctx->sk.vtable, iv,
+			br_dec32be(G + 12), H, 16);
+		memcpy(G, newG, 16);
+	}
+
+	/*
+	 * Output hash value is H||G. We truncate it to its first 128 bits,
+	 * i.e. H; that's our new AES key.
+	 */
+	ctx->sk.vtable->init(&ctx->sk.vtable, H, 16);
+	ctx->cc = 0;
+}
+
+/* see bearssl_rand.h */
+const br_prng_class br_aesctr_drbg_vtable = {
+	sizeof(br_aesctr_drbg_context),
+	(void (*)(const br_prng_class **, const void *, const void *, size_t))
+		&br_aesctr_drbg_init,
+	(void (*)(const br_prng_class **, void *, size_t))
+		&br_aesctr_drbg_generate,
+	(void (*)(const br_prng_class **, const void *, size_t))
+		&br_aesctr_drbg_update
+};
diff --git a/third_party/bearssl/src/asn1enc.c b/third_party/bearssl/src/asn1enc.c
new file mode 100644
index 0000000..7a74963
--- /dev/null
+++ b/third_party/bearssl/src/asn1enc.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+br_asn1_uint
+br_asn1_uint_prepare(const void *xdata, size_t xlen)
+{
+	const unsigned char *x;
+	br_asn1_uint t;
+
+	x = xdata;
+	while (xlen > 0 && *x == 0) {
+		x ++;
+		xlen --;
+	}
+	t.data = x;
+	t.len = xlen;
+	t.asn1len = xlen;
+	if (xlen == 0 || x[0] >= 0x80) {
+		t.asn1len ++;
+	}
+	return t;
+}
+
+/* see inner.h */
+size_t
+br_asn1_encode_length(void *dest, size_t len)
+{
+	unsigned char *buf;
+	size_t z;
+	int i, j;
+
+	buf = dest;
+	if (len < 0x80) {
+		if (buf != NULL) {
+			*buf = len;
+		}
+		return 1;
+	}
+	i = 0;
+	for (z = len; z != 0; z >>= 8) {
+		i ++;
+	}
+	if (buf != NULL) {
+		*buf ++ = 0x80 + i;
+		for (j = i - 1; j >= 0; j --) {
+			*buf ++ = len >> (j << 3);
+		}
+	}
+	return i + 1;
+}
+
+/* see inner.h */
+size_t
+br_asn1_encode_uint(void *dest, br_asn1_uint pp)
+{
+	unsigned char *buf;
+	size_t lenlen;
+
+	if (dest == NULL) {
+		return 1 + br_asn1_encode_length(NULL, pp.asn1len) + pp.asn1len;
+	}
+	buf = dest;
+	*buf ++ = 0x02;
+	lenlen = br_asn1_encode_length(buf, pp.asn1len);
+	buf += lenlen;
+	*buf = 0x00;
+	memcpy(buf + pp.asn1len - pp.len, pp.data, pp.len);
+	return 1 + lenlen + pp.asn1len;
+}
diff --git a/third_party/bearssl/src/ccm.c b/third_party/bearssl/src/ccm.c
new file mode 100644
index 0000000..68cc913
--- /dev/null
+++ b/third_party/bearssl/src/ccm.c
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * The combined CTR + CBC-MAC functions can only handle full blocks,
+ * so some buffering is necessary.
+ *
+ *  - 'ptr' contains a value from 0 to 15, which is the number of bytes
+ *    accumulated in buf[] that still needs to be processed with the
+ *    current CBC-MAC computation.
+ *
+ *  - When processing the message itself, CTR encryption/decryption is
+ *    also done at the same time. The first 'ptr' bytes of buf[] then
+ *    contains the plaintext bytes, while the last '16 - ptr' bytes of
+ *    buf[] are the remnants of the stream block, to be used against
+ *    the next input bytes, when available. When 'ptr' is 0, the
+ *    contents of buf[] are to be ignored.
+ *
+ *  - The current counter and running CBC-MAC values are kept in 'ctr'
+ *    and 'cbcmac', respectively.
+ */
+
+/* see bearssl_block.h */
+void
+br_ccm_init(br_ccm_context *ctx, const br_block_ctrcbc_class **bctx)
+{
+	ctx->bctx = bctx;
+}
+
+/* see bearssl_block.h */
+int
+br_ccm_reset(br_ccm_context *ctx, const void *nonce, size_t nonce_len,
+	uint64_t aad_len, uint64_t data_len, size_t tag_len)
+{
+	unsigned char tmp[16];
+	unsigned u, q;
+
+	if (nonce_len < 7 || nonce_len > 13) {
+		return 0;
+	}
+	if (tag_len < 4 || tag_len > 16 || (tag_len & 1) != 0) {
+		return 0;
+	}
+	q = 15 - (unsigned)nonce_len;
+	ctx->tag_len = tag_len;
+
+	/*
+	 * Block B0, to start CBC-MAC.
+	 */
+	tmp[0] = (aad_len > 0 ? 0x40 : 0x00)
+		| (((unsigned)tag_len - 2) << 2)
+		| (q - 1);
+	memcpy(tmp + 1, nonce, nonce_len);
+	for (u = 0; u < q; u ++) {
+		tmp[15 - u] = (unsigned char)data_len;
+		data_len >>= 8;
+	}
+	if (data_len != 0) {
+		/*
+		 * If the data length was not entirely consumed in the
+		 * loop above, then it exceeds the maximum limit of
+		 * q bytes (when encoded).
+		 */
+		return 0;
+	}
+
+	/*
+	 * Start CBC-MAC.
+	 */
+	memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, tmp, sizeof tmp);
+
+	/*
+	 * Assemble AAD length header.
+	 */
+	if ((aad_len >> 32) != 0) {
+		ctx->buf[0] = 0xFF;
+		ctx->buf[1] = 0xFF;
+		br_enc64be(ctx->buf + 2, aad_len);
+		ctx->ptr = 10;
+	} else if (aad_len >= 0xFF00) {
+		ctx->buf[0] = 0xFF;
+		ctx->buf[1] = 0xFE;
+		br_enc32be(ctx->buf + 2, (uint32_t)aad_len);
+		ctx->ptr = 6;
+	} else if (aad_len > 0) {
+		br_enc16be(ctx->buf, (unsigned)aad_len);
+		ctx->ptr = 2;
+	} else {
+		ctx->ptr = 0;
+	}
+
+	/*
+	 * Make initial counter value and compute tag mask.
+	 */
+	ctx->ctr[0] = q - 1;
+	memcpy(ctx->ctr + 1, nonce, nonce_len);
+	memset(ctx->ctr + 1 + nonce_len, 0, q);
+	memset(ctx->tagmask, 0, sizeof ctx->tagmask);
+	(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
+		ctx->tagmask, sizeof ctx->tagmask);
+
+	return 1;
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_aad_inject(br_ccm_context *ctx, const void *data, size_t len)
+{
+	const unsigned char *dbuf;
+	size_t ptr;
+
+	dbuf = data;
+
+	/*
+	 * Complete partial block, if needed.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		size_t clen;
+
+		clen = (sizeof ctx->buf) - ptr;
+		if (clen > len) {
+			memcpy(ctx->buf + ptr, dbuf, len);
+			ctx->ptr = ptr + len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, dbuf, clen);
+		dbuf += clen;
+		len -= clen;
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Process complete blocks.
+	 */
+	ptr = len & 15;
+	len -= ptr;
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, dbuf, len);
+	dbuf += len;
+
+	/*
+	 * Copy last partial block in the context buffer.
+	 */
+	memcpy(ctx->buf, dbuf, ptr);
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_flip(br_ccm_context *ctx)
+{
+	size_t ptr;
+
+	/*
+	 * Complete AAD partial block with zeros, if necessary.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+		ctx->ptr = 0;
+	}
+
+	/*
+	 * Counter was already set by br_ccm_reset().
+	 */
+}
+
+/* see bearssl_block.h */
+void
+br_ccm_run(br_ccm_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *dbuf;
+	size_t ptr;
+
+	dbuf = data;
+
+	/*
+	 * Complete a partial block, if any: ctx->buf[] contains
+	 * ctx->ptr plaintext bytes (already reported), and the other
+	 * bytes are CTR stream output.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		size_t clen;
+		size_t u;
+
+		clen = (sizeof ctx->buf) - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		if (encrypt) {
+			for (u = 0; u < clen; u ++) {
+				unsigned w, x;
+
+				w = ctx->buf[ptr + u];
+				x = dbuf[u];
+				ctx->buf[ptr + u] = x;
+				dbuf[u] = w ^ x;
+			}
+		} else {
+			for (u = 0; u < clen; u ++) {
+				unsigned w;
+
+				w = ctx->buf[ptr + u] ^ dbuf[u];
+				dbuf[u] = w;
+				ctx->buf[ptr + u] = w;
+			}
+		}
+		dbuf += clen;
+		len -= clen;
+		ptr += clen;
+		if (ptr < sizeof ctx->buf) {
+			ctx->ptr = ptr;
+			return;
+		}
+		(*ctx->bctx)->mac(ctx->bctx,
+			ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Process all complete blocks. Note that the ctrcbc API is for
+	 * encrypt-then-MAC (CBC-MAC is computed over the encrypted
+	 * blocks) while CCM uses MAC-and-encrypt (CBC-MAC is computed
+	 * over the plaintext blocks). Therefore, we need to use the
+	 * _decryption_ function for encryption, and the encryption
+	 * function for decryption (this works because CTR encryption
+	 * and decryption are identical, so the choice really is about
+	 * computing the CBC-MAC before or after XORing with the CTR
+	 * stream).
+	 */
+	ptr = len & 15;
+	len -= ptr;
+	if (encrypt) {
+		(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	} else {
+		(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	}
+	dbuf += len;
+
+	/*
+	 * If there is some remaining data, then we need to compute an
+	 * extra block of CTR stream.
+	 */
+	if (ptr != 0) {
+		size_t u;
+
+		memset(ctx->buf, 0, sizeof ctx->buf);
+		(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr,
+			ctx->buf, sizeof ctx->buf);
+		if (encrypt) {
+			for (u = 0; u < ptr; u ++) {
+				unsigned w, x;
+
+				w = ctx->buf[u];
+				x = dbuf[u];
+				ctx->buf[u] = x;
+				dbuf[u] = w ^ x;
+			}
+		} else {
+			for (u = 0; u < ptr; u ++) {
+				unsigned w;
+
+				w = ctx->buf[u] ^ dbuf[u];
+				dbuf[u] = w;
+				ctx->buf[u] = w;
+			}
+		}
+	}
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_block.h */
+size_t
+br_ccm_get_tag(br_ccm_context *ctx, void *tag)
+{
+	size_t ptr;
+	size_t u;
+
+	/*
+	 * If there is some buffered data, then we need to pad it with
+	 * zeros and finish up CBC-MAC.
+	 */
+	ptr = ctx->ptr;
+	if (ptr != 0) {
+		memset(ctx->buf + ptr, 0, (sizeof ctx->buf) - ptr);
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * XOR the tag mask into the CBC-MAC output.
+	 */
+	for (u = 0; u < ctx->tag_len; u ++) {
+		ctx->cbcmac[u] ^= ctx->tagmask[u];
+	}
+	memcpy(tag, ctx->cbcmac, ctx->tag_len);
+	return ctx->tag_len;
+}
+
+/* see bearssl_block.h */
+uint32_t
+br_ccm_check_tag(br_ccm_context *ctx, const void *tag)
+{
+	unsigned char tmp[16];
+	size_t u, tag_len;
+	uint32_t z;
+
+	tag_len = br_ccm_get_tag(ctx, tmp);
+	z = 0;
+	for (u = 0; u < tag_len; u ++) {
+		z |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(z);
+}
diff --git a/third_party/bearssl/src/ccopy.c b/third_party/bearssl/src/ccopy.c
new file mode 100644
index 0000000..2beace7
--- /dev/null
+++ b/third_party/bearssl/src/ccopy.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len)
+{
+	unsigned char *d;
+	const unsigned char *s;
+
+	d = dst;
+	s = src;
+	while (len -- > 0) {
+		uint32_t x, y;
+
+		x = *s ++;
+		y = *d;
+		*d = MUX(ctl, x, y);
+		d ++;
+	}
+}
diff --git a/third_party/bearssl/src/chacha20_ct.c b/third_party/bearssl/src/chacha20_ct.c
new file mode 100644
index 0000000..9961eb1
--- /dev/null
+++ b/third_party/bearssl/src/chacha20_ct.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+uint32_t
+br_chacha20_ct_run(const void *key,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint32_t kw[8], ivw[3];
+	size_t u;
+
+	static const uint32_t CW[] = {
+		0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+	};
+
+	buf = data;
+	for (u = 0; u < 8; u ++) {
+		kw[u] = br_dec32le((const unsigned char *)key + (u << 2));
+	}
+	for (u = 0; u < 3; u ++) {
+		ivw[u] = br_dec32le((const unsigned char *)iv + (u << 2));
+	}
+	while (len > 0) {
+		uint32_t state[16];
+		int i;
+		size_t clen;
+		unsigned char tmp[64];
+
+		memcpy(&state[0], CW, sizeof CW);
+		memcpy(&state[4], kw, sizeof kw);
+		state[12] = cc;
+		memcpy(&state[13], ivw, sizeof ivw);
+		for (i = 0; i < 10; i ++) {
+
+#define QROUND(a, b, c, d)   do { \
+		state[a] += state[b]; \
+		state[d] ^= state[a]; \
+		state[d] = (state[d] << 16) | (state[d] >> 16); \
+		state[c] += state[d]; \
+		state[b] ^= state[c]; \
+		state[b] = (state[b] << 12) | (state[b] >> 20); \
+		state[a] += state[b]; \
+		state[d] ^= state[a]; \
+		state[d] = (state[d] <<  8) | (state[d] >> 24); \
+		state[c] += state[d]; \
+		state[b] ^= state[c]; \
+		state[b] = (state[b] <<  7) | (state[b] >> 25); \
+	} while (0)
+
+			QROUND( 0,  4,  8, 12);
+			QROUND( 1,  5,  9, 13);
+			QROUND( 2,  6, 10, 14);
+			QROUND( 3,  7, 11, 15);
+			QROUND( 0,  5, 10, 15);
+			QROUND( 1,  6, 11, 12);
+			QROUND( 2,  7,  8, 13);
+			QROUND( 3,  4,  9, 14);
+
+#undef QROUND
+
+		}
+		for (u = 0; u < 4; u ++) {
+			br_enc32le(&tmp[u << 2], state[u] + CW[u]);
+		}
+		for (u = 4; u < 12; u ++) {
+			br_enc32le(&tmp[u << 2], state[u] + kw[u - 4]);
+		}
+		br_enc32le(&tmp[48], state[12] + cc);
+		for (u = 13; u < 16; u ++) {
+			br_enc32le(&tmp[u << 2], state[u] + ivw[u - 13]);
+		}
+
+		clen = len < 64 ? len : 64;
+		for (u = 0; u < clen; u ++) {
+			buf[u] ^= tmp[u];
+		}
+		buf += clen;
+		len -= clen;
+		cc ++;
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/chacha20_sse2.c b/third_party/bearssl/src/chacha20_sse2.c
new file mode 100644
index 0000000..92b4a4a
--- /dev/null
+++ b/third_party/bearssl/src/chacha20_sse2.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_SSE2
+
+/*
+ * This file contains a ChaCha20 implementation that leverages SSE2
+ * opcodes for better performance.
+ */
+
+/* see bearssl_block.h */
+br_chacha20_run
+br_chacha20_sse2_get(void)
+{
+	/*
+	 * If using 64-bit mode, then SSE2 opcodes should be automatically
+	 * available, since they are part of the ABI.
+	 *
+	 * In 32-bit mode, we use CPUID to detect the SSE2 feature.
+	 */
+
+#if BR_amd64
+	return &br_chacha20_sse2_run;
+#else
+
+	/*
+	 * SSE2 support is indicated by bit 26 in EDX.
+	 */
+	if (br_cpuid(0, 0, 0, 0x04000000)) {
+		return &br_chacha20_sse2_run;
+	} else {
+		return 0;
+	}
+#endif
+}
+
+BR_TARGETS_X86_UP
+
+/* see bearssl_block.h */
+BR_TARGET("sse2")
+uint32_t
+br_chacha20_sse2_run(const void *key,
+	const void *iv, uint32_t cc, void *data, size_t len)
+{
+	unsigned char *buf;
+	uint32_t ivtmp[4];
+	__m128i kw0, kw1;
+	__m128i iw, cw;
+	__m128i one;
+
+	static const uint32_t CW[] = {
+		0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+	};
+
+	buf = data;
+	kw0 = _mm_loadu_si128(key);
+	kw1 = _mm_loadu_si128((const void *)((const unsigned char *)key + 16));
+	ivtmp[0] = cc;
+	memcpy(ivtmp + 1, iv, 12);
+	iw = _mm_loadu_si128((const void *)ivtmp);
+	cw = _mm_loadu_si128((const void *)CW);
+	one = _mm_set_epi32(0, 0, 0, 1);
+
+	while (len > 0) {
+		/*
+		 * sj contains state words 4*j to 4*j+3.
+		 */
+		__m128i s0, s1, s2, s3;
+		int i;
+
+		s0 = cw;
+		s1 = kw0;
+		s2 = kw1;
+		s3 = iw;
+		for (i = 0; i < 10; i ++) {
+			/*
+			 * Even round is straightforward application on
+			 * the state words.
+			 */
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 16),
+				_mm_srli_epi32(s3, 16));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 12),
+				_mm_srli_epi32(s1, 20));
+
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 8),
+				_mm_srli_epi32(s3, 24));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 7),
+				_mm_srli_epi32(s1, 25));
+
+			/*
+			 * For the odd round, we must rotate some state
+			 * words so that the computations apply on the
+			 * right combinations of words.
+			 */
+			s1 = _mm_shuffle_epi32(s1, 0x39);
+			s2 = _mm_shuffle_epi32(s2, 0x4E);
+			s3 = _mm_shuffle_epi32(s3, 0x93);
+
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 16),
+				_mm_srli_epi32(s3, 16));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 12),
+				_mm_srli_epi32(s1, 20));
+
+			s0 = _mm_add_epi32(s0, s1);
+			s3 = _mm_xor_si128(s3, s0);
+			s3 = _mm_or_si128(
+				_mm_slli_epi32(s3, 8),
+				_mm_srli_epi32(s3, 24));
+
+			s2 = _mm_add_epi32(s2, s3);
+			s1 = _mm_xor_si128(s1, s2);
+			s1 = _mm_or_si128(
+				_mm_slli_epi32(s1, 7),
+				_mm_srli_epi32(s1, 25));
+
+			/*
+			 * After the odd round, we rotate back the values
+			 * to undo the rotate at the start of the odd round.
+			 */
+			s1 = _mm_shuffle_epi32(s1, 0x93);
+			s2 = _mm_shuffle_epi32(s2, 0x4E);
+			s3 = _mm_shuffle_epi32(s3, 0x39);
+		}
+
+		/*
+		 * Addition with the initial state.
+		 */
+		s0 = _mm_add_epi32(s0, cw);
+		s1 = _mm_add_epi32(s1, kw0);
+		s2 = _mm_add_epi32(s2, kw1);
+		s3 = _mm_add_epi32(s3, iw);
+
+		/*
+		 * Increment block counter.
+		 */
+		iw = _mm_add_epi32(iw, one);
+
+		/*
+		 * XOR final state with the data.
+		 */
+		if (len < 64) {
+			unsigned char tmp[64];
+			size_t u;
+
+			_mm_storeu_si128((void *)(tmp +  0), s0);
+			_mm_storeu_si128((void *)(tmp + 16), s1);
+			_mm_storeu_si128((void *)(tmp + 32), s2);
+			_mm_storeu_si128((void *)(tmp + 48), s3);
+			for (u = 0; u < len; u ++) {
+				buf[u] ^= tmp[u];
+			}
+			break;
+		} else {
+			__m128i b0, b1, b2, b3;
+
+			b0 = _mm_loadu_si128((const void *)(buf +  0));
+			b1 = _mm_loadu_si128((const void *)(buf + 16));
+			b2 = _mm_loadu_si128((const void *)(buf + 32));
+			b3 = _mm_loadu_si128((const void *)(buf + 48));
+			b0 = _mm_xor_si128(b0, s0);
+			b1 = _mm_xor_si128(b1, s1);
+			b2 = _mm_xor_si128(b2, s2);
+			b3 = _mm_xor_si128(b3, s3);
+			_mm_storeu_si128((void *)(buf +  0), b0);
+			_mm_storeu_si128((void *)(buf + 16), b1);
+			_mm_storeu_si128((void *)(buf + 32), b2);
+			_mm_storeu_si128((void *)(buf + 48), b3);
+			buf += 64;
+			len -= 64;
+		}
+	}
+
+	/*
+	 * _mm_extract_epi32() requires SSE4.1. We prefer to stick to
+	 * raw SSE2, thus we use _mm_extract_epi16().
+	 */
+	return (uint32_t)_mm_extract_epi16(iw, 0)
+		| ((uint32_t)_mm_extract_epi16(iw, 1) << 16);
+}
+
+BR_TARGETS_X86_DOWN
+
+#else
+
+/* see bearssl_block.h */
+br_chacha20_run
+br_chacha20_sse2_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/config.h b/third_party/bearssl/src/config.h
new file mode 100644
index 0000000..edf5d25
--- /dev/null
+++ b/third_party/bearssl/src/config.h
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CONFIG_H__
+#define CONFIG_H__
+
+/*
+ * This file contains compile-time flags that can override the
+ * autodetection performed in relevant files. Each flag is a macro; it
+ * deactivates the feature if defined to 0, activates it if defined to a
+ * non-zero integer (normally 1). If the macro is not defined, then
+ * autodetection applies.
+ */
+ 
+/* The x86 intrinsics seem to be incomplete compared to what aes_x86ni expects when compiling with NXDK */
+#ifdef NXDK
+#define BR_AES_X86NI 0
+#define BR_ENABLE_INTRINSICS 0
+#define BR_SSE2 0
+#define BR_RDRAND 0
+#undef _WIN32
+#endif
+
+/*
+ * When BR_64 is enabled, 64-bit integer types are assumed to be
+ * efficient (i.e. the architecture has 64-bit registers and can
+ * do 64-bit operations as fast as 32-bit operations).
+ *
+#define BR_64   1
+ */
+
+/*
+ * When BR_LOMUL is enabled, then multiplications of 32-bit values whose
+ * result are truncated to the low 32 bits are assumed to be
+ * substantially more efficient than 32-bit multiplications that yield
+ * 64-bit results. This is typically the case on low-end ARM Cortex M
+ * systems (M0, M0+, M1, and arguably M3 and M4 as well).
+ *
+#define BR_LOMUL   1
+ */
+
+/*
+ * When BR_SLOW_MUL is enabled, multiplications are assumed to be
+ * substantially slow with regards to other integer operations, thus
+ * making it worth to make more operations for a given task if it allows
+ * using less multiplications.
+ *
+#define BR_SLOW_MUL   1
+ */
+
+/*
+ * When BR_SLOW_MUL15 is enabled, short multplications (on 15-bit words)
+ * are assumed to be substantially slow with regards to other integer
+ * operations, thus making it worth to make more integer operations if
+ * it allows using less multiplications.
+ *
+#define BR_SLOW_MUL15   1
+ */
+
+/*
+ * When BR_CT_MUL31 is enabled, multiplications of 31-bit values (used
+ * in the "i31" big integer implementation) use an alternate implementation
+ * which is slower and larger than the normal multiplication, but should
+ * ensure constant-time multiplications even on architectures where the
+ * multiplication opcode takes a variable number of cycles to complete.
+ *
+#define BR_CT_MUL31   1
+ */
+
+/*
+ * When BR_CT_MUL15 is enabled, multiplications of 15-bit values (held
+ * in 32-bit words) use an alternate implementation which is slower and
+ * larger than the normal multiplication, but should ensure
+ * constant-time multiplications on most/all architectures where the
+ * basic multiplication is not constant-time.
+#define BR_CT_MUL15   1
+ */
+
+/*
+ * When BR_NO_ARITH_SHIFT is enabled, arithmetic right shifts (with sign
+ * extension) are performed with a sequence of operations which is bigger
+ * and slower than a simple right shift on a signed value. This avoids
+ * relying on an implementation-defined behaviour. However, most if not
+ * all C compilers use sign extension for right shifts on signed values,
+ * so this alternate macro is disabled by default.
+#define BR_NO_ARITH_SHIFT   1
+ */
+
+/*
+ * When BR_RDRAND is enabled, the SSL engine will use the RDRAND opcode
+ * to automatically obtain quality randomness for seeding its internal
+ * PRNG. Since that opcode is present only in recent x86 CPU, its
+ * support is dynamically tested; if the current CPU does not support
+ * it, then another random source will be used, such as /dev/urandom or
+ * CryptGenRandom().
+ *
+#define BR_RDRAND   1
+ */
+
+/*
+ * When BR_USE_GETENTROPY is enabled, the SSL engine will use the
+ * getentropy() function to obtain quality randomness for seeding its
+ * internal PRNG. On Linux and FreeBSD, getentropy() is implemented by
+ * the standard library with the system call getrandom(); on OpenBSD,
+ * getentropy() is the system call, and there is no getrandom() wrapper,
+ * hence the use of the getentropy() function for maximum portability.
+ *
+ * If the getentropy() call fails, and BR_USE_URANDOM is not explicitly
+ * disabled, then /dev/urandom will be used as a fallback mechanism. On
+ * FreeBSD and OpenBSD, this does not change much, since /dev/urandom
+ * will block if not enough entropy has been obtained since last boot.
+ * On Linux, /dev/urandom might not block, which can be troublesome in
+ * early boot stages, which is why getentropy() is preferred.
+ *
+#define BR_USE_GETENTROPY   1
+ */
+
+/*
+ * When BR_USE_URANDOM is enabled, the SSL engine will use /dev/urandom
+ * to automatically obtain quality randomness for seeding its internal
+ * PRNG.
+ *
+#define BR_USE_URANDOM   1
+ */
+
+/*
+ * When BR_USE_WIN32_RAND is enabled, the SSL engine will use the Win32
+ * (CryptoAPI) functions (CryptAcquireContext(), CryptGenRandom()...) to
+ * automatically obtain quality randomness for seeding its internal PRNG.
+ *
+ * Note: if both BR_USE_URANDOM and BR_USE_WIN32_RAND are defined, the
+ * former takes precedence.
+ *
+#define BR_USE_WIN32_RAND   1
+ */
+
+/*
+ * When BR_ARMEL_CORTEXM_GCC is enabled, some operations are replaced with
+ * inline assembly which is shorter and/or faster. This should be used
+ * only when all of the following are true:
+ *   - target architecture is ARM in Thumb mode
+ *   - target endianness is little-endian
+ *   - compiler is GCC (or GCC-compatible for inline assembly syntax)
+ *
+ * This is meant for the low-end cores (Cortex M0, M0+, M1, M3).
+ * Note: if BR_LOMUL is not explicitly enabled or disabled, then
+ * enabling BR_ARMEL_CORTEXM_GCC also enables BR_LOMUL.
+ *
+#define BR_ARMEL_CORTEXM_GCC   1
+ */
+
+/*
+ * When BR_AES_X86NI is enabled, the AES implementation using the x86 "NI"
+ * instructions (dedicated AES opcodes) will be compiled. If this is not
+ * enabled explicitly, then that AES implementation will be compiled only
+ * if a compatible compiler is detected. If set explicitly to 0, the
+ * implementation will not be compiled at all.
+ *
+#define BR_AES_X86NI   1
+ */
+
+/*
+ * When BR_SSE2 is enabled, SSE2 intrinsics will be used for some
+ * algorithm implementations that use them (e.g. chacha20_sse2). If this
+ * is not enabled explicitly, then support for SSE2 intrinsics will be
+ * automatically detected. If set explicitly to 0, then SSE2 code will
+ * not be compiled at all.
+ *
+#define BR_SSE2   1
+ */
+
+/*
+ * When BR_POWER8 is enabled, the AES implementation using the POWER ISA
+ * 2.07 opcodes (available on POWER8 processors and later) is compiled.
+ * If this is not enabled explicitly, then that implementation will be
+ * compiled only if a compatible compiler is detected, _and_ the target
+ * architecture is POWER8 or later.
+ *
+#define BR_POWER8   1
+ */
+
+/*
+ * When BR_INT128 is enabled, then code using the 'unsigned __int64'
+ * and 'unsigned __int128' types will be used to leverage 64x64->128
+ * unsigned multiplications. This should work with GCC and compatible
+ * compilers on 64-bit architectures.
+ *
+#define BR_INT128   1
+ */
+
+/*
+ * When BR_UMUL128 is enabled, then code using the '_umul128()' and
+ * '_addcarry_u64()' intrinsics will be used to implement 64x64->128
+ * unsigned multiplications. This should work on Visual C on x64 systems.
+ *
+#define BR_UMUL128   1
+ */
+
+/*
+ * When BR_LE_UNALIGNED is enabled, then the current architecture is
+ * assumed to use little-endian encoding for integers, and to tolerate
+ * unaligned accesses with no or minimal time penalty.
+ *
+#define BR_LE_UNALIGNED   1
+ */
+
+/*
+ * When BR_BE_UNALIGNED is enabled, then the current architecture is
+ * assumed to use big-endian encoding for integers, and to tolerate
+ * unaligned accesses with no or minimal time penalty.
+ *
+#define BR_BE_UNALIGNED   1
+ */
+
+#endif
diff --git a/third_party/bearssl/src/dec16be.c b/third_party/bearssl/src/dec16be.c
new file mode 100644
index 0000000..4f3f7f4
--- /dev/null
+++ b/third_party/bearssl/src/dec16be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec16be(uint16_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec16be(buf);
+		buf += 2;
+	}
+}
diff --git a/third_party/bearssl/src/dec16le.c b/third_party/bearssl/src/dec16le.c
new file mode 100644
index 0000000..84d8536
--- /dev/null
+++ b/third_party/bearssl/src/dec16le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec16le(uint16_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec16le(buf);
+		buf += 2;
+	}
+}
diff --git a/third_party/bearssl/src/dec32be.c b/third_party/bearssl/src/dec32be.c
new file mode 100644
index 0000000..5a8fc59
--- /dev/null
+++ b/third_party/bearssl/src/dec32be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec32be(uint32_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec32be(buf);
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/dec32le.c b/third_party/bearssl/src/dec32le.c
new file mode 100644
index 0000000..ed36e71
--- /dev/null
+++ b/third_party/bearssl/src/dec32le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec32le(uint32_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec32le(buf);
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/dec64be.c b/third_party/bearssl/src/dec64be.c
new file mode 100644
index 0000000..0c40a76
--- /dev/null
+++ b/third_party/bearssl/src/dec64be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec64be(uint64_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec64be(buf);
+		buf += 8;
+	}
+}
diff --git a/third_party/bearssl/src/dec64le.c b/third_party/bearssl/src/dec64le.c
new file mode 100644
index 0000000..cbd02c2
--- /dev/null
+++ b/third_party/bearssl/src/dec64le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_dec64le(uint64_t *v, size_t num, const void *src)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (num -- > 0) {
+		*v ++ = br_dec64le(buf);
+		buf += 8;
+	}
+}
diff --git a/third_party/bearssl/src/des_ct.c b/third_party/bearssl/src/des_ct.c
new file mode 100644
index 0000000..581c0ab
--- /dev/null
+++ b/third_party/bearssl/src/des_ct.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * During key schedule, we need to apply bit extraction PC-2 then permute
+ * things into our bitslice representation. PC-2 extracts 48 bits out
+ * of two 28-bit words (kl and kr), and we store these bits into two
+ * 32-bit words sk0 and sk1.
+ *
+ *  -- bit 16+x of sk0 comes from bit QL0[x] of kl
+ *  -- bit x of sk0 comes from bit QR0[x] of kr
+ *  -- bit 16+x of sk1 comes from bit QL1[x] of kl
+ *  -- bit x of sk1 comes from bit QR1[x] of kr
+ */
+
+static const unsigned char QL0[] = {
+	17,  4, 27, 23, 13, 22,  7, 18,
+	16, 24,  2, 20,  1,  8, 15, 26
+};
+
+static const unsigned char QR0[] = {
+	25, 19,  9,  1,  5, 11, 23,  8,
+	17,  0, 22,  3,  6, 20, 27, 24
+};
+
+static const unsigned char QL1[] = {
+	28, 28, 14, 11, 28, 28, 25,  0,
+	28, 28,  5,  9, 28, 28, 12, 21
+};
+
+static const unsigned char QR1[] = {
+	28, 28, 15,  4, 28, 28, 26, 16,
+	28, 28, 12,  7, 28, 28, 10, 14
+};
+
+/*
+ * 32-bit rotation. The C compiler is supposed to recognize it as a
+ * rotation and use the local architecture rotation opcode (if available).
+ */
+static inline uint32_t
+rotl(uint32_t x, int n)
+{
+	return (x << n) | (x >> (32 - n));
+}
+
+/*
+ * Compute key schedule for 8 key bytes (produces 32 subkey words).
+ */
+static void
+keysched_unit(uint32_t *skey, const void *key)
+{
+	int i;
+
+	br_des_keysched_unit(skey, key);
+
+	/*
+	 * Apply PC-2 + bitslicing.
+	 */
+	for (i = 0; i < 16; i ++) {
+		uint32_t kl, kr, sk0, sk1;
+		int j;
+
+		kl = skey[(i << 1) + 0];
+		kr = skey[(i << 1) + 1];
+		sk0 = 0;
+		sk1 = 0;
+		for (j = 0; j < 16; j ++) {
+			sk0 <<= 1;
+			sk1 <<= 1;
+			sk0 |= ((kl >> QL0[j]) & (uint32_t)1) << 16;
+			sk0 |= (kr >> QR0[j]) & (uint32_t)1;
+			sk1 |= ((kl >> QL1[j]) & (uint32_t)1) << 16;
+			sk1 |= (kr >> QR1[j]) & (uint32_t)1;
+		}
+
+		skey[(i << 1) + 0] = sk0;
+		skey[(i << 1) + 1] = sk1;
+	}
+
+#if 0
+		/*
+		 * Speed-optimized version for PC-2 + bitslicing.
+		 * (Unused. Kept for reference only.)
+		 */
+		sk0 = kl & (uint32_t)0x00100000;
+		sk0 |= (kl & (uint32_t)0x08008000) << 2;
+		sk0 |= (kl & (uint32_t)0x00400000) << 4;
+		sk0 |= (kl & (uint32_t)0x00800000) << 5;
+		sk0 |= (kl & (uint32_t)0x00040000) << 6;
+		sk0 |= (kl & (uint32_t)0x00010000) << 7;
+		sk0 |= (kl & (uint32_t)0x00000100) << 10;
+		sk0 |= (kl & (uint32_t)0x00022000) << 14;
+		sk0 |= (kl & (uint32_t)0x00000082) << 18;
+		sk0 |= (kl & (uint32_t)0x00000004) << 19;
+		sk0 |= (kl & (uint32_t)0x04000000) >> 10;
+		sk0 |= (kl & (uint32_t)0x00000010) << 26;
+		sk0 |= (kl & (uint32_t)0x01000000) >> 2;
+
+		sk0 |= kr & (uint32_t)0x00000100;
+		sk0 |= (kr & (uint32_t)0x00000008) << 1;
+		sk0 |= (kr & (uint32_t)0x00000200) << 4;
+		sk0 |= rotl(kr & (uint32_t)0x08000021, 6);
+		sk0 |= (kr & (uint32_t)0x01000000) >> 24;
+		sk0 |= (kr & (uint32_t)0x00000002) << 11;
+		sk0 |= (kr & (uint32_t)0x00100000) >> 18;
+		sk0 |= (kr & (uint32_t)0x00400000) >> 17;
+		sk0 |= (kr & (uint32_t)0x00800000) >> 14;
+		sk0 |= (kr & (uint32_t)0x02020000) >> 10;
+		sk0 |= (kr & (uint32_t)0x00080000) >> 5;
+		sk0 |= (kr & (uint32_t)0x00000040) >> 3;
+		sk0 |= (kr & (uint32_t)0x00000800) >> 1;
+
+		sk1 = kl & (uint32_t)0x02000000;
+		sk1 |= (kl & (uint32_t)0x00001000) << 5;
+		sk1 |= (kl & (uint32_t)0x00000200) << 11;
+		sk1 |= (kl & (uint32_t)0x00004000) << 15;
+		sk1 |= (kl & (uint32_t)0x00000020) << 16;
+		sk1 |= (kl & (uint32_t)0x00000800) << 17;
+		sk1 |= (kl & (uint32_t)0x00000001) << 24;
+		sk1 |= (kl & (uint32_t)0x00200000) >> 5;
+
+		sk1 |= (kr & (uint32_t)0x00000010) << 8;
+		sk1 |= (kr & (uint32_t)0x04000000) >> 17;
+		sk1 |= (kr & (uint32_t)0x00004000) >> 14;
+		sk1 |= (kr & (uint32_t)0x00000400) >> 9;
+		sk1 |= (kr & (uint32_t)0x00010000) >> 8;
+		sk1 |= (kr & (uint32_t)0x00001000) >> 7;
+		sk1 |= (kr & (uint32_t)0x00000080) >> 3;
+		sk1 |= (kr & (uint32_t)0x00008000) >> 2;
+#endif
+}
+
+/* see inner.h */
+unsigned
+br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+	switch (key_len) {
+	case 8:
+		keysched_unit(skey, key);
+		return 1;
+	case 16:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		memcpy(skey + 64, skey, 32 * sizeof *skey);
+		return 3;
+	default:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		keysched_unit(skey + 64, (const unsigned char *)key + 16);
+		return 3;
+	}
+}
+
+/*
+ * DES confusion function. This function performs expansion E (32 to
+ * 48 bits), XOR with subkey, S-boxes, and permutation P.
+ */
+static inline uint32_t
+Fconf(uint32_t r0, const uint32_t *sk)
+{
+	/*
+	 * Each 6->4 S-box is virtually turned into four 6->1 boxes; we
+	 * thus end up with 32 boxes that we call "T-boxes" here. We will
+	 * evaluate them with bitslice code.
+	 *
+	 * Each T-box is a circuit of multiplexers (sort of) and thus
+	 * takes 70 inputs: the 6 actual T-box inputs, and 64 constants
+	 * that describe the T-box output for all combinations of the
+	 * 6 inputs. With this model, all T-boxes are identical (with
+	 * distinct inputs) and thus can be executed in parallel with
+	 * bitslice code.
+	 *
+	 * T-boxes are numbered from 0 to 31, in least-to-most
+	 * significant order. Thus, S-box S1 corresponds to T-boxes 31,
+	 * 30, 29 and 28, in that order. T-box 'n' is computed with the
+	 * bits at rank 'n' in the 32-bit words.
+	 *
+	 * Words x0 to x5 contain the T-box inputs 0 to 5.
+	 */
+	uint32_t x0, x1, x2, x3, x4, x5, z0;
+	uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
+	uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
+	uint32_t y20, y21, y22, y23, y24, y25, y26, y27, y28, y29;
+	uint32_t y30;
+
+	/*
+	 * Spread input bits over the 6 input words x*.
+	 */
+	x1 = r0 & (uint32_t)0x11111111;
+	x2 = (r0 >> 1) & (uint32_t)0x11111111;
+	x3 = (r0 >> 2) & (uint32_t)0x11111111;
+	x4 = (r0 >> 3) & (uint32_t)0x11111111;
+	x1 = (x1 << 4) - x1;
+	x2 = (x2 << 4) - x2;
+	x3 = (x3 << 4) - x3;
+	x4 = (x4 << 4) - x4;
+	x0 = (x4 << 4) | (x4 >> 28);
+	x5 = (x1 >> 4) | (x1 << 28);
+
+	/*
+	 * XOR with the subkey for this round.
+	 */
+	x0 ^= sk[0];
+	x1 ^= sk[1];
+	x2 ^= sk[2];
+	x3 ^= sk[3];
+	x4 ^= sk[4];
+	x5 ^= sk[5];
+
+	/*
+	 * The T-boxes are done in parallel, since they all use a
+	 * "tree of multiplexer". We use "fake multiplexers":
+	 *
+	 *   y = a ^ (x & b)
+	 *
+	 * computes y as either 'a' (if x == 0) or 'a ^ b' (if x == 1).
+	 */
+	y0 = (uint32_t)0xEFA72C4D ^ (x0 & (uint32_t)0xEC7AC69C);
+	y1 = (uint32_t)0xAEAAEDFF ^ (x0 & (uint32_t)0x500FB821);
+	y2 = (uint32_t)0x37396665 ^ (x0 & (uint32_t)0x40EFA809);
+	y3 = (uint32_t)0x68D7B833 ^ (x0 & (uint32_t)0xA5EC0B28);
+	y4 = (uint32_t)0xC9C755BB ^ (x0 & (uint32_t)0x252CF820);
+	y5 = (uint32_t)0x73FC3606 ^ (x0 & (uint32_t)0x40205801);
+	y6 = (uint32_t)0xA2A0A918 ^ (x0 & (uint32_t)0xE220F929);
+	y7 = (uint32_t)0x8222BD90 ^ (x0 & (uint32_t)0x44A3F9E1);
+	y8 = (uint32_t)0xD6B6AC77 ^ (x0 & (uint32_t)0x794F104A);
+	y9 = (uint32_t)0x3069300C ^ (x0 & (uint32_t)0x026F320B);
+	y10 = (uint32_t)0x6CE0D5CC ^ (x0 & (uint32_t)0x7640B01A);
+	y11 = (uint32_t)0x59A9A22D ^ (x0 & (uint32_t)0x238F1572);
+	y12 = (uint32_t)0xAC6D0BD4 ^ (x0 & (uint32_t)0x7A63C083);
+	y13 = (uint32_t)0x21C83200 ^ (x0 & (uint32_t)0x11CCA000);
+	y14 = (uint32_t)0xA0E62188 ^ (x0 & (uint32_t)0x202F69AA);
+	/* y15 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
+	y16 = (uint32_t)0xAF7D655A ^ (x0 & (uint32_t)0x51B33BE9);
+	y17 = (uint32_t)0xF0168AA3 ^ (x0 & (uint32_t)0x3B0FE8AE);
+	y18 = (uint32_t)0x90AA30C6 ^ (x0 & (uint32_t)0x90BF8816);
+	y19 = (uint32_t)0x5AB2750A ^ (x0 & (uint32_t)0x09E34F9B);
+	y20 = (uint32_t)0x5391BE65 ^ (x0 & (uint32_t)0x0103BE88);
+	y21 = (uint32_t)0x93372BAF ^ (x0 & (uint32_t)0x49AC8E25);
+	y22 = (uint32_t)0xF288210C ^ (x0 & (uint32_t)0x922C313D);
+	y23 = (uint32_t)0x920AF5C0 ^ (x0 & (uint32_t)0x70EF31B0);
+	y24 = (uint32_t)0x63D312C0 ^ (x0 & (uint32_t)0x6A707100);
+	y25 = (uint32_t)0x537B3006 ^ (x0 & (uint32_t)0xB97C9011);
+	y26 = (uint32_t)0xA2EFB0A5 ^ (x0 & (uint32_t)0xA320C959);
+	y27 = (uint32_t)0xBC8F96A5 ^ (x0 & (uint32_t)0x6EA0AB4A);
+	y28 = (uint32_t)0xFAD176A5 ^ (x0 & (uint32_t)0x6953DDF8);
+	y29 = (uint32_t)0x665A14A3 ^ (x0 & (uint32_t)0xF74F3E2B);
+	y30 = (uint32_t)0xF2EFF0CC ^ (x0 & (uint32_t)0xF0306CAD);
+	/* y31 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */
+
+	y0 = y0 ^ (x1 & y1);
+	y1 = y2 ^ (x1 & y3);
+	y2 = y4 ^ (x1 & y5);
+	y3 = y6 ^ (x1 & y7);
+	y4 = y8 ^ (x1 & y9);
+	y5 = y10 ^ (x1 & y11);
+	y6 = y12 ^ (x1 & y13);
+	y7 = y14; /* was: y14 ^ (x1 & y15) */
+	y8 = y16 ^ (x1 & y17);
+	y9 = y18 ^ (x1 & y19);
+	y10 = y20 ^ (x1 & y21);
+	y11 = y22 ^ (x1 & y23);
+	y12 = y24 ^ (x1 & y25);
+	y13 = y26 ^ (x1 & y27);
+	y14 = y28 ^ (x1 & y29);
+	y15 = y30; /* was: y30 ^ (x1 & y31) */
+
+	y0 = y0 ^ (x2 & y1);
+	y1 = y2 ^ (x2 & y3);
+	y2 = y4 ^ (x2 & y5);
+	y3 = y6 ^ (x2 & y7);
+	y4 = y8 ^ (x2 & y9);
+	y5 = y10 ^ (x2 & y11);
+	y6 = y12 ^ (x2 & y13);
+	y7 = y14 ^ (x2 & y15);
+
+	y0 = y0 ^ (x3 & y1);
+	y1 = y2 ^ (x3 & y3);
+	y2 = y4 ^ (x3 & y5);
+	y3 = y6 ^ (x3 & y7);
+
+	y0 = y0 ^ (x4 & y1);
+	y1 = y2 ^ (x4 & y3);
+
+	y0 = y0 ^ (x5 & y1);
+
+	/*
+	 * The P permutation:
+	 * -- Each bit move is converted into a mask + left rotation.
+	 * -- Rotations that use the same movement are coalesced together.
+	 * -- Left and right shifts are used as alternatives to a rotation
+	 * where appropriate (this will help architectures that do not have
+	 * a rotation opcode).
+	 */
+	z0 = (y0 & (uint32_t)0x00000004) << 3;
+	z0 |= (y0 & (uint32_t)0x00004000) << 4;
+	z0 |= rotl(y0 & 0x12020120, 5);
+	z0 |= (y0 & (uint32_t)0x00100000) << 6;
+	z0 |= (y0 & (uint32_t)0x00008000) << 9;
+	z0 |= (y0 & (uint32_t)0x04000000) >> 22;
+	z0 |= (y0 & (uint32_t)0x00000001) << 11;
+	z0 |= rotl(y0 & 0x20000200, 12);
+	z0 |= (y0 & (uint32_t)0x00200000) >> 19;
+	z0 |= (y0 & (uint32_t)0x00000040) << 14;
+	z0 |= (y0 & (uint32_t)0x00010000) << 15;
+	z0 |= (y0 & (uint32_t)0x00000002) << 16;
+	z0 |= rotl(y0 & 0x40801800, 17);
+	z0 |= (y0 & (uint32_t)0x00080000) >> 13;
+	z0 |= (y0 & (uint32_t)0x00000010) << 21;
+	z0 |= (y0 & (uint32_t)0x01000000) >> 10;
+	z0 |= rotl(y0 & 0x88000008, 24);
+	z0 |= (y0 & (uint32_t)0x00000480) >> 7;
+	z0 |= (y0 & (uint32_t)0x00442000) >> 6;
+	return z0;
+}
+
+/*
+ * Process one block through 16 successive rounds, omitting the swap
+ * in the final round.
+ */
+static void
+process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *sk_exp)
+{
+	int i;
+	uint32_t l, r;
+
+	l = *pl;
+	r = *pr;
+	for (i = 0; i < 16; i ++) {
+		uint32_t t;
+
+		t = l ^ Fconf(r, sk_exp);
+		l = r;
+		r = t;
+		sk_exp += 6;
+	}
+	*pl = r;
+	*pr = l;
+}
+
+/* see inner.h */
+void
+br_des_ct_process_block(unsigned num_rounds,
+	const uint32_t *sk_exp, void *block)
+{
+	unsigned char *buf;
+	uint32_t l, r;
+
+	buf = block;
+	l = br_dec32be(buf);
+	r = br_dec32be(buf + 4);
+	br_des_do_IP(&l, &r);
+	while (num_rounds -- > 0) {
+		process_block_unit(&l, &r, sk_exp);
+		sk_exp += 96;
+	}
+	br_des_do_invIP(&l, &r);
+	br_enc32be(buf, l);
+	br_enc32be(buf + 4, r);
+}
+
+/* see inner.h */
+void
+br_des_ct_skey_expand(uint32_t *sk_exp,
+	unsigned num_rounds, const uint32_t *skey)
+{
+	num_rounds <<= 4;
+	while (num_rounds -- > 0) {
+		uint32_t v, w0, w1, w2, w3;
+
+		v = *skey ++;
+		w0 = v & 0x11111111;
+		w1 = (v >> 1) & 0x11111111;
+		w2 = (v >> 2) & 0x11111111;
+		w3 = (v >> 3) & 0x11111111;
+		*sk_exp ++ = (w0 << 4) - w0;
+		*sk_exp ++ = (w1 << 4) - w1;
+		*sk_exp ++ = (w2 << 4) - w2;
+		*sk_exp ++ = (w3 << 4) - w3;
+		v = *skey ++;
+		w0 = v & 0x11111111;
+		w1 = (v >> 1) & 0x11111111;
+		*sk_exp ++ = (w0 << 4) - w0;
+		*sk_exp ++ = (w1 << 4) - w1;
+	}
+}
diff --git a/third_party/bearssl/src/des_ct_cbcdec.c b/third_party/bearssl/src/des_ct_cbcdec.c
new file mode 100644
index 0000000..d208a3d
--- /dev/null
+++ b/third_party/bearssl/src/des_ct_cbcdec.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcdec_init(br_des_ct_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_ct_cbcdec_vtable;
+	ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
+	if (len == 8) {
+		br_des_rev_skey(ctx->skey);
+	} else {
+		int i;
+
+		for (i = 0; i < 48; i += 2) {
+			uint32_t t;
+
+			t = ctx->skey[i];
+			ctx->skey[i] = ctx->skey[94 - i];
+			ctx->skey[94 - i] = t;
+			t = ctx->skey[i + 1];
+			ctx->skey[i + 1] = ctx->skey[95 - i];
+			ctx->skey[95 - i] = t;
+		}
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcdec_run(const br_des_ct_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t sk_exp[288];
+
+	br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[8];
+		int i;
+
+		memcpy(tmp, buf, 8);
+		br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_des_ct_cbcdec_vtable = {
+	sizeof(br_des_ct_cbcdec_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_des_ct_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_des_ct_cbcdec_run
+};
diff --git a/third_party/bearssl/src/des_ct_cbcenc.c b/third_party/bearssl/src/des_ct_cbcenc.c
new file mode 100644
index 0000000..4b3610e
--- /dev/null
+++ b/third_party/bearssl/src/des_ct_cbcenc.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcenc_init(br_des_ct_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_ct_cbcenc_vtable;
+	ctx->num_rounds = br_des_ct_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_des_ct_cbcenc_run(const br_des_ct_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+	uint32_t sk_exp[288];
+
+	br_des_ct_skey_expand(sk_exp, ctx->num_rounds, ctx->skey);
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_des_ct_process_block(ctx->num_rounds, sk_exp, buf);
+		memcpy(ivbuf, buf, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_des_ct_cbcenc_vtable = {
+	sizeof(br_des_ct_cbcenc_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_des_ct_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_des_ct_cbcenc_run
+};
diff --git a/third_party/bearssl/src/des_support.c b/third_party/bearssl/src/des_support.c
new file mode 100644
index 0000000..37f6db3
--- /dev/null
+++ b/third_party/bearssl/src/des_support.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_des_do_IP(uint32_t *xl, uint32_t *xr)
+{
+	/*
+	 * Permutation algorithm is initially from Richard Outerbridge;
+	 * implementation here is adapted from Crypto++ "des.cpp" file
+	 * (which is in public domain).
+	 */
+	uint32_t l, r, t;
+
+	l = *xl;
+	r = *xr;
+	t = ((l >>  4) ^ r) & (uint32_t)0x0F0F0F0F;
+	r ^= t;
+	l ^= t <<  4;
+	t = ((l >> 16) ^ r) & (uint32_t)0x0000FFFF;
+	r ^= t;
+	l ^= t << 16;
+	t = ((r >>  2) ^ l) & (uint32_t)0x33333333;
+	l ^= t;
+	r ^= t <<  2;
+	t = ((r >>  8) ^ l) & (uint32_t)0x00FF00FF;
+	l ^= t;
+	r ^= t <<  8;
+	t = ((l >>  1) ^ r) & (uint32_t)0x55555555;
+	r ^= t;
+	l ^= t <<  1;
+	*xl = l;
+	*xr = r;
+}
+
+/* see inner.h */
+void
+br_des_do_invIP(uint32_t *xl, uint32_t *xr)
+{
+	/*
+	 * See br_des_do_IP().
+	 */
+	uint32_t l, r, t;
+
+	l = *xl;
+	r = *xr;
+	t = ((l >>  1) ^ r) & 0x55555555;
+	r ^= t;
+	l ^= t <<  1;
+	t = ((r >>  8) ^ l) & 0x00FF00FF;
+	l ^= t;
+	r ^= t <<  8;
+	t = ((r >>  2) ^ l) & 0x33333333;
+	l ^= t;
+	r ^= t <<  2;
+	t = ((l >> 16) ^ r) & 0x0000FFFF;
+	r ^= t;
+	l ^= t << 16;
+	t = ((l >>  4) ^ r) & 0x0F0F0F0F;
+	r ^= t;
+	l ^= t <<  4;
+	*xl = l;
+	*xr = r;
+}
+
+/* see inner.h */
+void
+br_des_keysched_unit(uint32_t *skey, const void *key)
+{
+	uint32_t xl, xr, kl, kr;
+	int i;
+
+	xl = br_dec32be(key);
+	xr = br_dec32be((const unsigned char *)key + 4);
+
+	/*
+	 * Permutation PC-1 is quite similar to the IP permutation.
+	 * Definition of IP (in FIPS 46-3 notations) is:
+	 *   58 50 42 34 26 18 10 2
+	 *   60 52 44 36 28 20 12 4
+	 *   62 54 46 38 30 22 14 6
+	 *   64 56 48 40 32 24 16 8
+	 *   57 49 41 33 25 17  9 1
+	 *   59 51 43 35 27 19 11 3
+	 *   61 53 45 37 29 21 13 5
+	 *   63 55 47 39 31 23 15 7
+	 *
+	 * Definition of PC-1 is:
+	 *   57 49 41 33 25 17  9 1
+	 *   58 50 42 34 26 18 10 2
+	 *   59 51 43 35 27 19 11 3
+	 *   60 52 44 36
+	 *   63 55 47 39 31 23 15 7
+	 *   62 54 46 38 30 22 14 6
+	 *   61 53 45 37 29 21 13 5
+	 *   28 20 12  4
+	 */
+	br_des_do_IP(&xl, &xr);
+	kl = ((xr & (uint32_t)0xFF000000) >> 4)
+		| ((xl & (uint32_t)0xFF000000) >> 12)
+		| ((xr & (uint32_t)0x00FF0000) >> 12)
+		| ((xl & (uint32_t)0x00FF0000) >> 20);
+	kr = ((xr & (uint32_t)0x000000FF) << 20)
+		| ((xl & (uint32_t)0x0000FF00) << 4)
+		| ((xr & (uint32_t)0x0000FF00) >> 4)
+		| ((xl & (uint32_t)0x000F0000) >> 16);
+
+	/*
+	 * For each round, rotate the two 28-bit words kl and kr.
+	 * The extraction of the 48-bit subkey (PC-2) is not done yet.
+	 */
+	for (i = 0; i < 16; i ++) {
+		if ((1 << i) & 0x8103) {
+			kl = (kl << 1) | (kl >> 27);
+			kr = (kr << 1) | (kr >> 27);
+		} else {
+			kl = (kl << 2) | (kl >> 26);
+			kr = (kr << 2) | (kr >> 26);
+		}
+		kl &= (uint32_t)0x0FFFFFFF;
+		kr &= (uint32_t)0x0FFFFFFF;
+		skey[(i << 1) + 0] = kl;
+		skey[(i << 1) + 1] = kr;
+	}
+}
+
+/* see inner.h */
+void
+br_des_rev_skey(uint32_t *skey)
+{
+	int i;
+
+	for (i = 0; i < 16; i += 2) {
+		uint32_t t;
+
+		t = skey[i + 0];
+		skey[i + 0] = skey[30 - i];
+		skey[30 - i] = t;
+		t = skey[i + 1];
+		skey[i + 1] = skey[31 - i];
+		skey[31 - i] = t;
+	}
+}
diff --git a/third_party/bearssl/src/des_tab.c b/third_party/bearssl/src/des_tab.c
new file mode 100644
index 0000000..3f8e4f9
--- /dev/null
+++ b/third_party/bearssl/src/des_tab.c
@@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * PC2left[x] tells where bit x goes when applying PC-2. 'x' is a bit
+ * position in the left rotated key word. Both position are in normal
+ * order (rightmost bit is 0).
+ */
+static const unsigned char PC2left[] = {
+	16,  3,  7, 24, 20, 11, 24,
+	13,  2, 10, 24, 22,  5, 15,
+	23,  1,  9, 21, 12, 24,  6,
+	 4, 14, 18,  8, 17,  0, 19
+};
+
+/*
+ * Similar to PC2left[x], for the right rotated key word.
+ */
+static const unsigned char PC2right[] = {
+	 8, 18, 24,  6, 22, 15,  3,
+	10, 12, 19,  5, 14, 11, 24,
+	 4, 23, 16,  9, 24, 20,  2,
+	24,  7, 13,  0, 21, 17,  1
+};
+
+/*
+ * S-boxes and PC-1 merged.
+ */
+static const uint32_t S1[] = {
+	0x00808200, 0x00000000, 0x00008000, 0x00808202,
+	0x00808002, 0x00008202, 0x00000002, 0x00008000,
+	0x00000200, 0x00808200, 0x00808202, 0x00000200,
+	0x00800202, 0x00808002, 0x00800000, 0x00000002,
+	0x00000202, 0x00800200, 0x00800200, 0x00008200,
+	0x00008200, 0x00808000, 0x00808000, 0x00800202,
+	0x00008002, 0x00800002, 0x00800002, 0x00008002,
+	0x00000000, 0x00000202, 0x00008202, 0x00800000,
+	0x00008000, 0x00808202, 0x00000002, 0x00808000,
+	0x00808200, 0x00800000, 0x00800000, 0x00000200,
+	0x00808002, 0x00008000, 0x00008200, 0x00800002,
+	0x00000200, 0x00000002, 0x00800202, 0x00008202,
+	0x00808202, 0x00008002, 0x00808000, 0x00800202,
+	0x00800002, 0x00000202, 0x00008202, 0x00808200,
+	0x00000202, 0x00800200, 0x00800200, 0x00000000,
+	0x00008002, 0x00008200, 0x00000000, 0x00808002
+};
+
+static const uint32_t S2[] = {
+	0x40084010, 0x40004000, 0x00004000, 0x00084010,
+	0x00080000, 0x00000010, 0x40080010, 0x40004010,
+	0x40000010, 0x40084010, 0x40084000, 0x40000000,
+	0x40004000, 0x00080000, 0x00000010, 0x40080010,
+	0x00084000, 0x00080010, 0x40004010, 0x00000000,
+	0x40000000, 0x00004000, 0x00084010, 0x40080000,
+	0x00080010, 0x40000010, 0x00000000, 0x00084000,
+	0x00004010, 0x40084000, 0x40080000, 0x00004010,
+	0x00000000, 0x00084010, 0x40080010, 0x00080000,
+	0x40004010, 0x40080000, 0x40084000, 0x00004000,
+	0x40080000, 0x40004000, 0x00000010, 0x40084010,
+	0x00084010, 0x00000010, 0x00004000, 0x40000000,
+	0x00004010, 0x40084000, 0x00080000, 0x40000010,
+	0x00080010, 0x40004010, 0x40000010, 0x00080010,
+	0x00084000, 0x00000000, 0x40004000, 0x00004010,
+	0x40000000, 0x40080010, 0x40084010, 0x00084000
+};
+
+static const uint32_t S3[] = {
+	0x00000104, 0x04010100, 0x00000000, 0x04010004,
+	0x04000100, 0x00000000, 0x00010104, 0x04000100,
+	0x00010004, 0x04000004, 0x04000004, 0x00010000,
+	0x04010104, 0x00010004, 0x04010000, 0x00000104,
+	0x04000000, 0x00000004, 0x04010100, 0x00000100,
+	0x00010100, 0x04010000, 0x04010004, 0x00010104,
+	0x04000104, 0x00010100, 0x00010000, 0x04000104,
+	0x00000004, 0x04010104, 0x00000100, 0x04000000,
+	0x04010100, 0x04000000, 0x00010004, 0x00000104,
+	0x00010000, 0x04010100, 0x04000100, 0x00000000,
+	0x00000100, 0x00010004, 0x04010104, 0x04000100,
+	0x04000004, 0x00000100, 0x00000000, 0x04010004,
+	0x04000104, 0x00010000, 0x04000000, 0x04010104,
+	0x00000004, 0x00010104, 0x00010100, 0x04000004,
+	0x04010000, 0x04000104, 0x00000104, 0x04010000,
+	0x00010104, 0x00000004, 0x04010004, 0x00010100
+};
+
+static const uint32_t S4[] = {
+	0x80401000, 0x80001040, 0x80001040, 0x00000040,
+	0x00401040, 0x80400040, 0x80400000, 0x80001000,
+	0x00000000, 0x00401000, 0x00401000, 0x80401040,
+	0x80000040, 0x00000000, 0x00400040, 0x80400000,
+	0x80000000, 0x00001000, 0x00400000, 0x80401000,
+	0x00000040, 0x00400000, 0x80001000, 0x00001040,
+	0x80400040, 0x80000000, 0x00001040, 0x00400040,
+	0x00001000, 0x00401040, 0x80401040, 0x80000040,
+	0x00400040, 0x80400000, 0x00401000, 0x80401040,
+	0x80000040, 0x00000000, 0x00000000, 0x00401000,
+	0x00001040, 0x00400040, 0x80400040, 0x80000000,
+	0x80401000, 0x80001040, 0x80001040, 0x00000040,
+	0x80401040, 0x80000040, 0x80000000, 0x00001000,
+	0x80400000, 0x80001000, 0x00401040, 0x80400040,
+	0x80001000, 0x00001040, 0x00400000, 0x80401000,
+	0x00000040, 0x00400000, 0x00001000, 0x00401040
+};
+
+static const uint32_t S5[] = {
+	0x00000080, 0x01040080, 0x01040000, 0x21000080,
+	0x00040000, 0x00000080, 0x20000000, 0x01040000,
+	0x20040080, 0x00040000, 0x01000080, 0x20040080,
+	0x21000080, 0x21040000, 0x00040080, 0x20000000,
+	0x01000000, 0x20040000, 0x20040000, 0x00000000,
+	0x20000080, 0x21040080, 0x21040080, 0x01000080,
+	0x21040000, 0x20000080, 0x00000000, 0x21000000,
+	0x01040080, 0x01000000, 0x21000000, 0x00040080,
+	0x00040000, 0x21000080, 0x00000080, 0x01000000,
+	0x20000000, 0x01040000, 0x21000080, 0x20040080,
+	0x01000080, 0x20000000, 0x21040000, 0x01040080,
+	0x20040080, 0x00000080, 0x01000000, 0x21040000,
+	0x21040080, 0x00040080, 0x21000000, 0x21040080,
+	0x01040000, 0x00000000, 0x20040000, 0x21000000,
+	0x00040080, 0x01000080, 0x20000080, 0x00040000,
+	0x00000000, 0x20040000, 0x01040080, 0x20000080
+};
+
+static const uint32_t S6[] = {
+	0x10000008, 0x10200000, 0x00002000, 0x10202008,
+	0x10200000, 0x00000008, 0x10202008, 0x00200000,
+	0x10002000, 0x00202008, 0x00200000, 0x10000008,
+	0x00200008, 0x10002000, 0x10000000, 0x00002008,
+	0x00000000, 0x00200008, 0x10002008, 0x00002000,
+	0x00202000, 0x10002008, 0x00000008, 0x10200008,
+	0x10200008, 0x00000000, 0x00202008, 0x10202000,
+	0x00002008, 0x00202000, 0x10202000, 0x10000000,
+	0x10002000, 0x00000008, 0x10200008, 0x00202000,
+	0x10202008, 0x00200000, 0x00002008, 0x10000008,
+	0x00200000, 0x10002000, 0x10000000, 0x00002008,
+	0x10000008, 0x10202008, 0x00202000, 0x10200000,
+	0x00202008, 0x10202000, 0x00000000, 0x10200008,
+	0x00000008, 0x00002000, 0x10200000, 0x00202008,
+	0x00002000, 0x00200008, 0x10002008, 0x00000000,
+	0x10202000, 0x10000000, 0x00200008, 0x10002008
+};
+
+static const uint32_t S7[] = {
+	0x00100000, 0x02100001, 0x02000401, 0x00000000,
+	0x00000400, 0x02000401, 0x00100401, 0x02100400,
+	0x02100401, 0x00100000, 0x00000000, 0x02000001,
+	0x00000001, 0x02000000, 0x02100001, 0x00000401,
+	0x02000400, 0x00100401, 0x00100001, 0x02000400,
+	0x02000001, 0x02100000, 0x02100400, 0x00100001,
+	0x02100000, 0x00000400, 0x00000401, 0x02100401,
+	0x00100400, 0x00000001, 0x02000000, 0x00100400,
+	0x02000000, 0x00100400, 0x00100000, 0x02000401,
+	0x02000401, 0x02100001, 0x02100001, 0x00000001,
+	0x00100001, 0x02000000, 0x02000400, 0x00100000,
+	0x02100400, 0x00000401, 0x00100401, 0x02100400,
+	0x00000401, 0x02000001, 0x02100401, 0x02100000,
+	0x00100400, 0x00000000, 0x00000001, 0x02100401,
+	0x00000000, 0x00100401, 0x02100000, 0x00000400,
+	0x02000001, 0x02000400, 0x00000400, 0x00100001
+};
+
+static const uint32_t S8[] = {
+	0x08000820, 0x00000800, 0x00020000, 0x08020820,
+	0x08000000, 0x08000820, 0x00000020, 0x08000000,
+	0x00020020, 0x08020000, 0x08020820, 0x00020800,
+	0x08020800, 0x00020820, 0x00000800, 0x00000020,
+	0x08020000, 0x08000020, 0x08000800, 0x00000820,
+	0x00020800, 0x00020020, 0x08020020, 0x08020800,
+	0x00000820, 0x00000000, 0x00000000, 0x08020020,
+	0x08000020, 0x08000800, 0x00020820, 0x00020000,
+	0x00020820, 0x00020000, 0x08020800, 0x00000800,
+	0x00000020, 0x08020020, 0x00000800, 0x00020820,
+	0x08000800, 0x00000020, 0x08000020, 0x08020000,
+	0x08020020, 0x08000000, 0x00020000, 0x08000820,
+	0x00000000, 0x08020820, 0x00020020, 0x08000020,
+	0x08020000, 0x08000800, 0x08000820, 0x00000000,
+	0x08020820, 0x00020800, 0x00020800, 0x00000820,
+	0x00000820, 0x00020020, 0x08000000, 0x08020800
+};
+
+static inline uint32_t
+Fconf(uint32_t r0, uint32_t skl, uint32_t skr)
+{
+	uint32_t r1;
+
+	r1 = (r0 << 16) | (r0 >> 16);
+	return
+		  S1[((r1 >> 11) ^ (skl >> 18)) & 0x3F]
+		| S2[((r0 >> 23) ^ (skl >> 12)) & 0x3F]
+		| S3[((r0 >> 19) ^ (skl >>  6)) & 0x3F]
+		| S4[((r0 >> 15) ^ (skl      )) & 0x3F]
+		| S5[((r0 >> 11) ^ (skr >> 18)) & 0x3F]
+		| S6[((r0 >>  7) ^ (skr >> 12)) & 0x3F]
+		| S7[((r0 >>  3) ^ (skr >>  6)) & 0x3F]
+		| S8[((r1 >> 15) ^ (skr      )) & 0x3F];
+}
+
+static void
+process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *skey)
+{
+	int i;
+	uint32_t l, r;
+
+	l = *pl;
+	r = *pr;
+	for (i = 0; i < 16; i ++) {
+		uint32_t t;
+
+		t = l ^ Fconf(r, skey[(i << 1) + 0], skey[(i << 1) + 1]);
+		l = r;
+		r = t;
+	}
+	*pl = r;
+	*pr = l;
+}
+
+/* see inner.h */
+void
+br_des_tab_process_block(unsigned num_rounds, const uint32_t *skey, void *block)
+{
+	unsigned char *buf;
+	uint32_t l, r;
+
+	buf = block;
+	l = br_dec32be(buf);
+	r = br_dec32be(buf + 4);
+	br_des_do_IP(&l, &r);
+	while (num_rounds -- > 0) {
+		process_block_unit(&l, &r, skey);
+		skey += 32;
+	}
+	br_des_do_invIP(&l, &r);
+	br_enc32be(buf, l);
+	br_enc32be(buf + 4, r);
+}
+
+static void
+keysched_unit(uint32_t *skey, const void *key)
+{
+	int i;
+
+	br_des_keysched_unit(skey, key);
+
+	/*
+	 * Apply PC-2 to get the 48-bit subkeys.
+	 */
+	for (i = 0; i < 16; i ++) {
+		uint32_t xl, xr, ul, ur;
+		int j;
+
+		xl = skey[(i << 1) + 0];
+		xr = skey[(i << 1) + 1];
+		ul = 0;
+		ur = 0;
+		for (j = 0; j < 28; j ++) {
+			ul |= (xl & 1) << PC2left[j];
+			ur |= (xr & 1) << PC2right[j];
+			xl >>= 1;
+			xr >>= 1;
+		}
+		skey[(i << 1) + 0] = ul;
+		skey[(i << 1) + 1] = ur;
+	}
+}
+
+/* see inner.h */
+unsigned
+br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len)
+{
+	switch (key_len) {
+	case 8:
+		keysched_unit(skey, key);
+		return 1;
+	case 16:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		memcpy(skey + 64, skey, 32 * sizeof *skey);
+		return 3;
+	default:
+		keysched_unit(skey, key);
+		keysched_unit(skey + 32, (const unsigned char *)key + 8);
+		br_des_rev_skey(skey + 32);
+		keysched_unit(skey + 64, (const unsigned char *)key + 16);
+		return 3;
+	}
+}
diff --git a/third_party/bearssl/src/des_tab_cbcdec.c b/third_party/bearssl/src/des_tab_cbcdec.c
new file mode 100644
index 0000000..e7eabe9
--- /dev/null
+++ b/third_party/bearssl/src/des_tab_cbcdec.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcdec_init(br_des_tab_cbcdec_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_tab_cbcdec_vtable;
+	ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
+	if (len == 8) {
+		br_des_rev_skey(ctx->skey);
+	} else {
+		int i;
+
+		for (i = 0; i < 48; i += 2) {
+			uint32_t t;
+
+			t = ctx->skey[i];
+			ctx->skey[i] = ctx->skey[94 - i];
+			ctx->skey[94 - i] = t;
+			t = ctx->skey[i + 1];
+			ctx->skey[i + 1] = ctx->skey[95 - i];
+			ctx->skey[95 - i] = t;
+		}
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcdec_run(const br_des_tab_cbcdec_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[8];
+		int i;
+
+		memcpy(tmp, buf, 8);
+		br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		memcpy(ivbuf, tmp, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcdec_class br_des_tab_cbcdec_vtable = {
+	sizeof(br_des_tab_cbcdec_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcdec_class **, const void *, size_t))
+		&br_des_tab_cbcdec_init,
+	(void (*)(const br_block_cbcdec_class *const *, void *, void *, size_t))
+		&br_des_tab_cbcdec_run
+};
diff --git a/third_party/bearssl/src/des_tab_cbcenc.c b/third_party/bearssl/src/des_tab_cbcenc.c
new file mode 100644
index 0000000..3a45ba3
--- /dev/null
+++ b/third_party/bearssl/src/des_tab_cbcenc.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcenc_init(br_des_tab_cbcenc_keys *ctx,
+	const void *key, size_t len)
+{
+	ctx->vtable = &br_des_tab_cbcenc_vtable;
+	ctx->num_rounds = br_des_tab_keysched(ctx->skey, key, len);
+}
+
+/* see bearssl_block.h */
+void
+br_des_tab_cbcenc_run(const br_des_tab_cbcenc_keys *ctx,
+	void *iv, void *data, size_t len)
+{
+	unsigned char *buf, *ivbuf;
+
+	ivbuf = iv;
+	buf = data;
+	while (len > 0) {
+		int i;
+
+		for (i = 0; i < 8; i ++) {
+			buf[i] ^= ivbuf[i];
+		}
+		br_des_tab_process_block(ctx->num_rounds, ctx->skey, buf);
+		memcpy(ivbuf, buf, 8);
+		buf += 8;
+		len -= 8;
+	}
+}
+
+/* see bearssl_block.h */
+const br_block_cbcenc_class br_des_tab_cbcenc_vtable = {
+	sizeof(br_des_tab_cbcenc_keys),
+	8,
+	3,
+	(void (*)(const br_block_cbcenc_class **, const void *, size_t))
+		&br_des_tab_cbcenc_init,
+	(void (*)(const br_block_cbcenc_class *const *, void *, void *, size_t))
+		&br_des_tab_cbcenc_run
+};
diff --git a/third_party/bearssl/src/dig_oid.c b/third_party/bearssl/src/dig_oid.c
new file mode 100644
index 0000000..cd9692c
--- /dev/null
+++ b/third_party/bearssl/src/dig_oid.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This file contains the encoded OID for the standard hash functions.
+ * Such OID appear in, for instance, the PKCS#1 v1.5 padding for RSA
+ * signatures.
+ */
+
+static const unsigned char md5_OID[] = {
+	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x02, 0x05
+};
+
+static const unsigned char sha1_OID[] = {
+	0x2B, 0x0E, 0x03, 0x02, 0x1A
+};
+
+static const unsigned char sha224_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04
+};
+
+static const unsigned char sha256_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01
+};
+
+static const unsigned char sha384_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02
+};
+
+static const unsigned char sha512_OID[] = {
+	0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03
+};
+
+/* see inner.h */
+const unsigned char *
+br_digest_OID(int digest_id, size_t *len)
+{
+	switch (digest_id) {
+	case br_md5_ID:
+		*len = sizeof md5_OID;
+		return md5_OID;
+	case br_sha1_ID:
+		*len = sizeof sha1_OID;
+		return sha1_OID;
+	case br_sha224_ID:
+		*len = sizeof sha224_OID;
+		return sha224_OID;
+	case br_sha256_ID:
+		*len = sizeof sha256_OID;
+		return sha256_OID;
+	case br_sha384_ID:
+		*len = sizeof sha384_OID;
+		return sha384_OID;
+	case br_sha512_ID:
+		*len = sizeof sha512_OID;
+		return sha512_OID;
+	default:
+		*len = 0;
+		return NULL;
+	}
+}
diff --git a/third_party/bearssl/src/dig_size.c b/third_party/bearssl/src/dig_size.c
new file mode 100644
index 0000000..4625d2c
--- /dev/null
+++ b/third_party/bearssl/src/dig_size.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+size_t
+br_digest_size_by_ID(int digest_id)
+{
+	switch (digest_id) {
+	case br_md5sha1_ID:
+		return br_md5_SIZE + br_sha1_SIZE;
+	case br_md5_ID:
+		return br_md5_SIZE;
+	case br_sha1_ID:
+		return br_sha1_SIZE;
+	case br_sha224_ID:
+		return br_sha224_SIZE;
+	case br_sha256_ID:
+		return br_sha256_SIZE;
+	case br_sha384_ID:
+		return br_sha384_SIZE;
+	case br_sha512_ID:
+		return br_sha512_SIZE;
+	default:
+		/* abort(); */
+		return 0;
+	}
+}
diff --git a/third_party/bearssl/src/eax.c b/third_party/bearssl/src/eax.c
new file mode 100644
index 0000000..bcc704a
--- /dev/null
+++ b/third_party/bearssl/src/eax.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * The combined CTR + CBC-MAC functions can only handle full blocks,
+ * so some buffering is necessary. Moreover, EAX has a special padding
+ * rule for CBC-MAC, which implies that we cannot compute the MAC over
+ * the last received full block until we know whether we are at the
+ * end of the data or not.
+ *
+ *  - 'ptr' contains a value from 1 to 16, which is the number of bytes
+ *    accumulated in buf[] that still needs to be processed with the
+ *    current OMAC computation. Beware that this can go to 16: a
+ *    complete block cannot be processed until it is known whether it
+ *    is the last block or not. However, it can never be 0, because
+ *    OMAC^t works on an input that is at least one-block long.
+ *
+ *  - When processing the message itself, CTR encryption/decryption is
+ *    also done at the same time. The first 'ptr' bytes of buf[] then
+ *    contains the encrypted bytes, while the last '16 - ptr' bytes of
+ *    buf[] are the remnants of the stream block, to be used against
+ *    the next input bytes, when available.
+ *
+ *  - The current counter and running CBC-MAC values are kept in 'ctr'
+ *    and 'cbcmac', respectively.
+ *
+ *  - The derived keys for padding are kept in L2 and L4 (double and
+ *    quadruple of Enc_K(0^n), in GF(2^128), respectively).
+ */
+
+/*
+ * Start an OMAC computation; the first block is the big-endian
+ * representation of the provided value ('val' must fit on one byte).
+ * We make it a delayed block because it may also be the last one,
+ */
+static void
+omac_start(br_eax_context *ctx, unsigned val)
+{
+	memset(ctx->cbcmac, 0, sizeof ctx->cbcmac);
+	memset(ctx->buf, 0, sizeof ctx->buf);
+	ctx->buf[15] = val;
+	ctx->ptr = 16;
+}
+
+/*
+ * Double a value in finite field GF(2^128), defined with modulus
+ * X^128+X^7+X^2+X+1.
+ */
+static void
+double_gf128(unsigned char *dst, const unsigned char *src)
+{
+	unsigned cc;
+	int i;
+
+	cc = 0x87 & -((unsigned)src[0] >> 7);
+	for (i = 15; i >= 0; i --) {
+		unsigned z;
+
+		z = (src[i] << 1) ^ cc;
+		cc = z >> 8;
+		dst[i] = (unsigned char)z;
+	}
+}
+
+/*
+ * Apply padding to the last block, currently in ctx->buf (with
+ * ctx->ptr bytes), and finalize OMAC computation.
+ */
+static void
+do_pad(br_eax_context *ctx)
+{
+	unsigned char *pad;
+	size_t ptr, u;
+
+	ptr = ctx->ptr;
+	if (ptr == 16) {
+		pad = ctx->L2;
+	} else {
+		ctx->buf[ptr ++] = 0x80;
+		memset(ctx->buf + ptr, 0x00, 16 - ptr);
+		pad = ctx->L4;
+	}
+	for (u = 0; u < sizeof ctx->buf; u ++) {
+		ctx->buf[u] ^= pad[u];
+	}
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, ctx->buf, sizeof ctx->buf);
+}
+
+/*
+ * Apply CBC-MAC on the provided data, with buffering management.
+ *
+ * Upon entry, two situations are acceptable:
+ *
+ *   ctx->ptr == 0: there is no data to process in ctx->buf
+ *   ctx->ptr == 16: there is a full block of unprocessed data in ctx->buf
+ *
+ * Upon exit, ctx->ptr may be zero only if it was already zero on entry,
+ * and len == 0. In all other situations, ctx->ptr will be non-zero on
+ * exit (and may have value 16).
+ */
+static void
+do_cbcmac_chunk(br_eax_context *ctx, const void *data, size_t len)
+{
+	size_t ptr;
+
+	if (len == 0) {
+		return;
+	}
+	ptr = len & (size_t)15;
+	if (ptr == 0) {
+		len -= 16;
+		ptr = 16;
+	} else {
+		len -= ptr;
+	}
+	if (ctx->ptr == 16) {
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+	(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac, data, len);
+	memcpy(ctx->buf, (const unsigned char *)data + len, ptr);
+	ctx->ptr = ptr;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_init(br_eax_context *ctx, const br_block_ctrcbc_class **bctx)
+{
+	unsigned char tmp[16], iv[16];
+
+	ctx->vtable = &br_eax_vtable;
+	ctx->bctx = bctx;
+
+	/*
+	 * Encrypt a whole-zero block to compute L2 and L4.
+	 */
+	memset(tmp, 0, sizeof tmp);
+	memset(iv, 0, sizeof iv);
+	(*bctx)->ctr(bctx, iv, tmp, sizeof tmp);
+	double_gf128(ctx->L2, tmp);
+	double_gf128(ctx->L4, ctx->L2);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_capture(const br_eax_context *ctx, br_eax_state *st)
+{
+	/*
+	 * We capture the three OMAC* states _after_ processing the
+	 * initial block (assuming that nonce, message and AAD are
+	 * all non-empty).
+	 */
+	int i;
+
+	memset(st->st, 0, sizeof st->st);
+	for (i = 0; i < 3; i ++) {
+		unsigned char tmp[16];
+
+		memset(tmp, 0, sizeof tmp);
+		tmp[15] = (unsigned char)i;
+		(*ctx->bctx)->mac(ctx->bctx, st->st[i], tmp, sizeof tmp);
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset(br_eax_context *ctx, const void *nonce, size_t len)
+{
+	/*
+	 * Process nonce with OMAC^0.
+	 */
+	omac_start(ctx, 0);
+	do_cbcmac_chunk(ctx, nonce, len);
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	/*
+	 * Start OMAC^1 for the AAD ("header" in the EAX specification).
+	 */
+	omac_start(ctx, 1);
+
+	/*
+	 * We use ctx->head[0] as temporary flag to mark that we are
+	 * using a "normal" reset().
+	 */
+	ctx->head[0] = 0;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset_pre_aad(br_eax_context *ctx, const br_eax_state *st,
+	const void *nonce, size_t len)
+{
+	if (len == 0) {
+		omac_start(ctx, 0);
+	} else {
+		memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);
+		ctx->ptr = 0;
+		do_cbcmac_chunk(ctx, nonce, len);
+	}
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	memcpy(ctx->cbcmac, st->st[1], sizeof ctx->cbcmac);
+	ctx->ptr = 0;
+
+	memcpy(ctx->ctr, st->st[2], sizeof ctx->ctr);
+
+	/*
+	 * We use ctx->head[0] as a flag to indicate that we use a
+	 * a recorded state, with ctx->ctr containing the preprocessed
+	 * first block for OMAC^2.
+	 */
+	ctx->head[0] = 1;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_reset_post_aad(br_eax_context *ctx, const br_eax_state *st,
+	const void *nonce, size_t len)
+{
+	if (len == 0) {
+		omac_start(ctx, 0);
+	} else {
+		memcpy(ctx->cbcmac, st->st[0], sizeof ctx->cbcmac);
+		ctx->ptr = 0;
+		do_cbcmac_chunk(ctx, nonce, len);
+	}
+	do_pad(ctx);
+	memcpy(ctx->nonce, ctx->cbcmac, sizeof ctx->cbcmac);
+	memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);
+
+	memcpy(ctx->head, st->st[1], sizeof ctx->head);
+
+	memcpy(ctx->cbcmac, st->st[2], sizeof ctx->cbcmac);
+	ctx->ptr = 0;
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_aad_inject(br_eax_context *ctx, const void *data, size_t len)
+{
+	size_t ptr;
+
+	ptr = ctx->ptr;
+
+	/*
+	 * If there is a partial block, first complete it.
+	 */
+	if (ptr < 16) {
+		size_t clen;
+
+		clen = 16 - ptr;
+		if (len <= clen) {
+			memcpy(ctx->buf + ptr, data, len);
+			ctx->ptr = ptr + len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+	}
+
+	/*
+	 * We now have a full block in buf[], and this is not the last
+	 * block.
+	 */
+	do_cbcmac_chunk(ctx, data, len);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_flip(br_eax_context *ctx)
+{
+	int from_capture;
+
+	/*
+	 * ctx->head[0] may be non-zero if the context was reset with
+	 * a pre-AAD captured state. In that case, ctx->ctr[] contains
+	 * the state for OMAC^2 _after_ processing the first block.
+	 */
+	from_capture = ctx->head[0];
+
+	/*
+	 * Complete the OMAC computation on the AAD.
+	 */
+	do_pad(ctx);
+	memcpy(ctx->head, ctx->cbcmac, sizeof ctx->cbcmac);
+
+	/*
+	 * Start OMAC^2 for the encrypted data.
+	 * If the context was initialized from a captured state, then
+	 * the OMAC^2 value is in the ctr[] array.
+	 */
+	if (from_capture) {
+		memcpy(ctx->cbcmac, ctx->ctr, sizeof ctx->cbcmac);
+		ctx->ptr = 0;
+	} else {
+		omac_start(ctx, 2);
+	}
+
+	/*
+	 * Initial counter value for CTR is the processed nonce.
+	 */
+	memcpy(ctx->ctr, ctx->nonce, sizeof ctx->nonce);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_run(br_eax_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *dbuf;
+	size_t ptr;
+
+	/*
+	 * Ensure that there is actual data to process.
+	 */
+	if (len == 0) {
+		return;
+	}
+
+	dbuf = data;
+	ptr = ctx->ptr;
+
+	/*
+	 * We may have ptr == 0 here if we initialized from a captured
+	 * state. In that case, there is no partially consumed block
+	 * or unprocessed data.
+	 */
+	if (ptr != 0 && ptr != 16) {
+		/*
+		 * We have a partially consumed block.
+		 */
+		size_t u, clen;
+
+		clen = 16 - ptr;
+		if (len <= clen) {
+			clen = len;
+		}
+		if (encrypt) {
+			for (u = 0; u < clen; u ++) {
+				ctx->buf[ptr + u] ^= dbuf[u];
+			}
+			memcpy(dbuf, ctx->buf + ptr, clen);
+		} else {
+			for (u = 0; u < clen; u ++) {
+				unsigned dx, sx;
+
+				sx = ctx->buf[ptr + u];
+				dx = dbuf[u];
+				ctx->buf[ptr + u] = dx;
+				dbuf[u] = sx ^ dx;
+			}
+		}
+
+		if (len <= clen) {
+			ctx->ptr = ptr + clen;
+			return;
+		}
+		dbuf += clen;
+		len -= clen;
+	}
+
+	/*
+	 * We now have a complete encrypted block in buf[] that must still
+	 * be processed with OMAC, and this is not the final buf.
+	 * Exception: when ptr == 0, no block has been produced yet.
+	 */
+	if (ptr != 0) {
+		(*ctx->bctx)->mac(ctx->bctx, ctx->cbcmac,
+			ctx->buf, sizeof ctx->buf);
+	}
+
+	/*
+	 * Do CTR encryption or decryption and CBC-MAC for all full blocks
+	 * except the last.
+	 */
+	ptr = len & (size_t)15;
+	if (ptr == 0) {
+		len -= 16;
+		ptr = 16;
+	} else {
+		len -= ptr;
+	}
+	if (encrypt) {
+		(*ctx->bctx)->encrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	} else {
+		(*ctx->bctx)->decrypt(ctx->bctx, ctx->ctr, ctx->cbcmac,
+			dbuf, len);
+	}
+	dbuf += len;
+
+	/*
+	 * Compute next block of CTR stream, and use it to finish
+	 * encrypting or decrypting the data.
+	 */
+	memset(ctx->buf, 0, sizeof ctx->buf);
+	(*ctx->bctx)->ctr(ctx->bctx, ctx->ctr, ctx->buf, sizeof ctx->buf);
+	if (encrypt) {
+		size_t u;
+
+		for (u = 0; u < ptr; u ++) {
+			ctx->buf[u] ^= dbuf[u];
+		}
+		memcpy(dbuf, ctx->buf, ptr);
+	} else {
+		size_t u;
+
+		for (u = 0; u < ptr; u ++) {
+			unsigned dx, sx;
+
+			sx = ctx->buf[u];
+			dx = dbuf[u];
+			ctx->buf[u] = dx;
+			dbuf[u] = sx ^ dx;
+		}
+	}
+	ctx->ptr = ptr;
+}
+
+/*
+ * Complete tag computation. The final tag is written in ctx->cbcmac.
+ */
+static void
+do_final(br_eax_context *ctx)
+{
+	size_t u;
+
+	do_pad(ctx);
+
+	/*
+	 * Authentication tag is the XOR of the three OMAC outputs for
+	 * the nonce, AAD and encrypted data.
+	 */
+	for (u = 0; u < 16; u ++) {
+		ctx->cbcmac[u] ^= ctx->nonce[u] ^ ctx->head[u];
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_get_tag(br_eax_context *ctx, void *tag)
+{
+	do_final(ctx);
+	memcpy(tag, ctx->cbcmac, sizeof ctx->cbcmac);
+}
+
+/* see bearssl_aead.h */
+void
+br_eax_get_tag_trunc(br_eax_context *ctx, void *tag, size_t len)
+{
+	do_final(ctx);
+	memcpy(tag, ctx->cbcmac, len);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_eax_check_tag_trunc(br_eax_context *ctx, const void *tag, size_t len)
+{
+	unsigned char tmp[16];
+	size_t u;
+	int x;
+
+	br_eax_get_tag(ctx, tmp);
+	x = 0;
+	for (u = 0; u < len; u ++) {
+		x |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(x);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_eax_check_tag(br_eax_context *ctx, const void *tag)
+{
+	return br_eax_check_tag_trunc(ctx, tag, 16);
+}
+
+/* see bearssl_aead.h */
+const br_aead_class br_eax_vtable = {
+	16,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_reset,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_aad_inject,
+	(void (*)(const br_aead_class **))
+		&br_eax_flip,
+	(void (*)(const br_aead_class **, int, void *, size_t))
+		&br_eax_run,
+	(void (*)(const br_aead_class **, void *))
+		&br_eax_get_tag,
+	(uint32_t (*)(const br_aead_class **, const void *))
+		&br_eax_check_tag,
+	(void (*)(const br_aead_class **, void *, size_t))
+		&br_eax_get_tag_trunc,
+	(uint32_t (*)(const br_aead_class **, const void *, size_t))
+		&br_eax_check_tag_trunc
+};
diff --git a/third_party/bearssl/src/ec_all_m15.c b/third_party/bearssl/src/ec_all_m15.c
new file mode 100644
index 0000000..bb550e1
--- /dev/null
+++ b/third_party/bearssl/src/ec_all_m15.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.generator(curve, len);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.generator(curve, len);
+	default:
+		return br_ec_prime_i15.generator(curve, len);
+	}
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.order(curve, len);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.order(curve, len);
+	default:
+		return br_ec_prime_i15.order(curve, len);
+	}
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.xoff(curve, len);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.xoff(curve, len);
+	default:
+		return br_ec_prime_i15.xoff(curve, len);
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.mul(G, Glen, kb, kblen, curve);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.mul(G, Glen, kb, kblen, curve);
+	default:
+		return br_ec_prime_i15.mul(G, Glen, kb, kblen, curve);
+	}
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.mulgen(R, x, xlen, curve);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.mulgen(R, x, xlen, curve);
+	default:
+		return br_ec_prime_i15.mulgen(R, x, xlen, curve);
+	}
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return br_ec_p256_m15.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	case BR_EC_curve25519:
+		return br_ec_c25519_m15.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	default:
+		return br_ec_prime_i15.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	}
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_all_m15 = {
+	(uint32_t)0x23800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_all_m31.c b/third_party/bearssl/src/ec_all_m31.c
new file mode 100644
index 0000000..8fd8c3c
--- /dev/null
+++ b/third_party/bearssl/src/ec_all_m31.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.generator(curve, len);
+#else
+		return br_ec_p256_m31.generator(curve, len);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.generator(curve, len);
+#else
+		return br_ec_c25519_m31.generator(curve, len);
+#endif
+	default:
+		return br_ec_prime_i31.generator(curve, len);
+	}
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.order(curve, len);
+#else
+		return br_ec_p256_m31.order(curve, len);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.order(curve, len);
+#else
+		return br_ec_c25519_m31.order(curve, len);
+#endif
+	default:
+		return br_ec_prime_i31.order(curve, len);
+	}
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.xoff(curve, len);
+#else
+		return br_ec_p256_m31.xoff(curve, len);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.xoff(curve, len);
+#else
+		return br_ec_c25519_m31.xoff(curve, len);
+#endif
+	default:
+		return br_ec_prime_i31.xoff(curve, len);
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.mul(G, Glen, kb, kblen, curve);
+#else
+		return br_ec_p256_m31.mul(G, Glen, kb, kblen, curve);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.mul(G, Glen, kb, kblen, curve);
+#else
+		return br_ec_c25519_m31.mul(G, Glen, kb, kblen, curve);
+#endif
+	default:
+		return br_ec_prime_i31.mul(G, Glen, kb, kblen, curve);
+	}
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.mulgen(R, x, xlen, curve);
+#else
+		return br_ec_p256_m31.mulgen(R, x, xlen, curve);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.mulgen(R, x, xlen, curve);
+#else
+		return br_ec_c25519_m31.mulgen(R, x, xlen, curve);
+#endif
+	default:
+		return br_ec_prime_i31.mulgen(R, x, xlen, curve);
+	}
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_p256_m64.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#else
+		return br_ec_p256_m31.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#endif
+	case BR_EC_curve25519:
+#if BR_INT128 || BR_UMUL128
+		return br_ec_c25519_m64.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#else
+		return br_ec_c25519_m31.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+#endif
+	default:
+		return br_ec_prime_i31.muladd(A, B, len,
+			x, xlen, y, ylen, curve);
+	}
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_all_m31 = {
+	(uint32_t)0x23800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_i15.c b/third_party/bearssl/src/ec_c25519_i15.c
new file mode 100644
index 0000000..8fadcf4
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_i15.c
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for the field:
+ *   - field modulus p = 2^255-19
+ *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
+ */
+
+static const uint16_t C255_P[] = {
+	0x0110,
+	0x7FED, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF
+};
+
+#define P0I   0x4A1B
+
+static const uint16_t C255_R2[] = {
+	0x0110,
+	0x0169, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000
+};
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int_mont(const char *name, const uint16_t *x)
+{
+	uint16_t y[18];
+	unsigned char tmp[32];
+	size_t u;
+
+	printf("%s = ", name);
+	memcpy(y, x, sizeof y);
+	br_i15_from_monty(y, C255_P, P0I);
+	br_i15_encode(tmp, sizeof tmp, y);
+	for (u = 0; u < sizeof tmp; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+static const uint16_t C255_A24[] = {
+	0x0110,
+	0x45D3, 0x0046, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000
+};
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint16_t *a, uint16_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 18; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static void
+c255_add(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	uint32_t ctl;
+	uint16_t t[18];
+
+	memcpy(t, a, sizeof t);
+	ctl = br_i15_add(t, b, 1);
+	ctl |= NOT(br_i15_sub(t, C255_P, 0));
+	br_i15_sub(t, C255_P, ctl);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_sub(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	uint16_t t[18];
+
+	memcpy(t, a, sizeof t);
+	br_i15_add(t, C255_P, br_i15_sub(t, b, 1));
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_mul(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	uint16_t t[18];
+
+	br_i15_montymul(t, a, b, C255_P, P0I);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+byteswap(unsigned char *G)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		unsigned char t;
+
+		t = G[i];
+		G[i] = G[31 - i];
+		G[31 - i] = t;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+#define ILEN   (18 * sizeof(uint16_t))
+
+	/*
+	 * The a[] and b[] arrays have an extra word to allow for
+	 * decoding without using br_i15_decode_reduce().
+	 */
+	uint16_t x1[18], x2[18], x3[18], z2[18], z3[18];
+	uint16_t a[19], aa[18], b[19], bb[18];
+	uint16_t c[18], d[18], e[18], da[18], cb[18];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Byteswap the point encoding, because it uses little-endian, and
+	 * the generic decoding routine uses big-endian.
+	 */
+	byteswap(G);
+
+	/*
+	 * Decode the point ('u' coordinate). This should be reduced
+	 * modulo p, but we prefer to avoid the dependency on
+	 * br_i15_decode_reduce(). Instead, we use br_i15_decode_mod()
+	 * with a synthetic modulus of value 2^255 (this must work
+	 * since G was truncated to 255 bits), then use a conditional
+	 * subtraction. We use br_i15_decode_mod() and not
+	 * br_i15_decode(), because the ec_prime_i15 implementation uses
+	 * the former but not the latter.
+	 *    br_i15_decode_reduce(a, G, 32, C255_P);
+	 */
+	br_i15_zero(b, 0x111);
+	b[18] = 1;
+	br_i15_decode_mod(a, G, 32, b);
+	a[0] = 0x110;
+	br_i15_sub(a, C255_P, NOT(br_i15_sub(a, C255_P, 0)));
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	br_i15_montymul(x1, a, C255_R2, C255_P, P0I);
+	memcpy(x3, x1, ILEN);
+	br_i15_zero(z2, C255_P[0]);
+	memcpy(x2, z2, ILEN);
+	x2[1] = 19;
+	memcpy(z3, x2, ILEN);
+
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int_mont("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+
+		c255_add(a, x2, z2);
+		c255_mul(aa, a, a);
+		c255_sub(b, x2, z2);
+		c255_mul(bb, b, b);
+		c255_sub(e, aa, bb);
+		c255_add(c, x3, z3);
+		c255_sub(d, x3, z3);
+		c255_mul(da, d, a);
+		c255_mul(cb, c, b);
+
+		/* obsolete
+		print_int_mont("a ", a);
+		print_int_mont("aa", aa);
+		print_int_mont("b ", b);
+		print_int_mont("bb", bb);
+		print_int_mont("e ", e);
+		print_int_mont("c ", c);
+		print_int_mont("d ", d);
+		print_int_mont("da", da);
+		print_int_mont("cb", cb);
+		*/
+
+		c255_add(x3, da, cb);
+		c255_mul(x3, x3, x3);
+		c255_sub(z3, da, cb);
+		c255_mul(z3, z3, z3);
+		c255_mul(z3, z3, x1);
+		c255_mul(x2, aa, bb);
+		c255_mul(z2, C255_A24, e);
+		c255_add(z2, z2, aa);
+		c255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, ILEN);
+	for (i = 0; i < 15; i ++) {
+		c255_mul(a, a, a);
+		c255_mul(a, a, z2);
+	}
+	memcpy(b, a, ILEN);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			c255_mul(b, b, b);
+		}
+		c255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		c255_mul(b, b, b);
+		if ((0xFFEB >> i) & 1) {
+			c255_mul(b, z2, b);
+		}
+	}
+	c255_mul(b, x2, b);
+
+	/*
+	 * To avoid a dependency on br_i15_from_monty(), we use a
+	 * Montgomery multiplication with 1.
+	 *    memcpy(x2, b, ILEN);
+	 *    br_i15_from_monty(x2, C255_P, P0I);
+	 */
+	br_i15_zero(a, C255_P[0]);
+	a[1] = 1;
+	br_i15_montymul(x2, a, b, C255_P, P0I);
+
+	br_i15_encode(G, 32, x2);
+	byteswap(G);
+	return 1;
+
+#undef ILEN
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_i15 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_i31.c b/third_party/bearssl/src/ec_c25519_i31.c
new file mode 100644
index 0000000..f8ffc2c
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_i31.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for the field:
+ *   - field modulus p = 2^255-19
+ *   - R^2 mod p (R = 2^(31k) for the smallest k such that R >= p)
+ */
+
+static const uint32_t C255_P[] = {
+	0x00000107,
+	0x7FFFFFED, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0000007F
+};
+
+#define P0I   0x286BCA1B
+
+static const uint32_t C255_R2[] = {
+	0x00000107,
+	0x00000000, 0x02D20000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+
+static const uint32_t C255_A24[] = {
+	0x00000107,
+	0x53000000, 0x0000468B, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000
+};
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int_mont(const char *name, const uint32_t *x)
+{
+	uint32_t y[10];
+	unsigned char tmp[32];
+	size_t u;
+
+	printf("%s = ", name);
+	memcpy(y, x, sizeof y);
+	br_i31_from_monty(y, C255_P, P0I);
+	br_i31_encode(tmp, sizeof tmp, y);
+	for (u = 0; u < sizeof tmp; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 10; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static void
+c255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t ctl;
+	uint32_t t[10];
+
+	memcpy(t, a, sizeof t);
+	ctl = br_i31_add(t, b, 1);
+	ctl |= NOT(br_i31_sub(t, C255_P, 0));
+	br_i31_sub(t, C255_P, ctl);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[10];
+
+	memcpy(t, a, sizeof t);
+	br_i31_add(t, C255_P, br_i31_sub(t, b, 1));
+	memcpy(d, t, sizeof t);
+}
+
+static void
+c255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[10];
+
+	br_i31_montymul(t, a, b, C255_P, P0I);
+	memcpy(d, t, sizeof t);
+}
+
+static void
+byteswap(unsigned char *G)
+{
+	int i;
+
+	for (i = 0; i < 16; i ++) {
+		unsigned char t;
+
+		t = G[i];
+		G[i] = G[31 - i];
+		G[31 - i] = t;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	uint32_t x1[10], x2[10], x3[10], z2[10], z3[10];
+	uint32_t a[10], aa[10], b[10], bb[10];
+	uint32_t c[10], d[10], e[10], da[10], cb[10];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Byteswap the point encoding, because it uses little-endian, and
+	 * the generic decoding routine uses big-endian.
+	 */
+	byteswap(G);
+
+	/*
+	 * Decode the point ('u' coordinate). This should be reduced
+	 * modulo p, but we prefer to avoid the dependency on
+	 * br_i31_decode_reduce(). Instead, we use br_i31_decode_mod()
+	 * with a synthetic modulus of value 2^255 (this must work
+	 * since G was truncated to 255 bits), then use a conditional
+	 * subtraction. We use br_i31_decode_mod() and not
+	 * br_i31_decode(), because the ec_prime_i31 implementation uses
+	 * the former but not the latter.
+	 *    br_i31_decode_reduce(a, G, 32, C255_P);
+	 */
+	br_i31_zero(b, 0x108);
+	b[9] = 0x0080;
+	br_i31_decode_mod(a, G, 32, b);
+	a[0] = 0x107;
+	br_i31_sub(a, C255_P, NOT(br_i31_sub(a, C255_P, 0)));
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	br_i31_montymul(x1, a, C255_R2, C255_P, P0I);
+	memcpy(x3, x1, sizeof x1);
+	br_i31_zero(z2, C255_P[0]);
+	memcpy(x2, z2, sizeof z2);
+	x2[1] = 0x13000000;
+	memcpy(z3, x2, sizeof x2);
+
+	/*
+	 * kb[] is in big-endian notation, but possibly shorter than k[].
+	 */
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int_mont("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+
+		c255_add(a, x2, z2);
+		c255_mul(aa, a, a);
+		c255_sub(b, x2, z2);
+		c255_mul(bb, b, b);
+		c255_sub(e, aa, bb);
+		c255_add(c, x3, z3);
+		c255_sub(d, x3, z3);
+		c255_mul(da, d, a);
+		c255_mul(cb, c, b);
+
+		/* obsolete
+		print_int_mont("a ", a);
+		print_int_mont("aa", aa);
+		print_int_mont("b ", b);
+		print_int_mont("bb", bb);
+		print_int_mont("e ", e);
+		print_int_mont("c ", c);
+		print_int_mont("d ", d);
+		print_int_mont("da", da);
+		print_int_mont("cb", cb);
+		*/
+
+		c255_add(x3, da, cb);
+		c255_mul(x3, x3, x3);
+		c255_sub(z3, da, cb);
+		c255_mul(z3, z3, z3);
+		c255_mul(z3, z3, x1);
+		c255_mul(x2, aa, bb);
+		c255_mul(z2, C255_A24, e);
+		c255_add(z2, z2, aa);
+		c255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int_mont("x2", x2);
+		print_int_mont("z2", z2);
+		print_int_mont("x3", x3);
+		print_int_mont("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		c255_mul(a, a, a);
+		c255_mul(a, a, z2);
+	}
+	memcpy(b, a, sizeof a);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			c255_mul(b, b, b);
+		}
+		c255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		c255_mul(b, b, b);
+		if ((0xFFEB >> i) & 1) {
+			c255_mul(b, z2, b);
+		}
+	}
+	c255_mul(b, x2, b);
+
+	/*
+	 * To avoid a dependency on br_i31_from_monty(), we use
+	 * a Montgomery multiplication with 1.
+	 *    memcpy(x2, b, sizeof b);
+	 *    br_i31_from_monty(x2, C255_P, P0I);
+	 */
+	br_i31_zero(a, C255_P[0]);
+	a[1] = 1;
+	br_i31_montymul(x2, a, b, C255_P, P0I);
+
+	br_i31_encode(G, 32, x2);
+	byteswap(G);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_i31 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_m15.c b/third_party/bearssl/src/ec_c25519_m15.c
new file mode 100644
index 0000000..deff55b
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_m15.c
@@ -0,0 +1,1478 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int(const char *name, const uint32_t *x)
+{
+	size_t u;
+	unsigned char tmp[36];
+
+	printf("%s = ", name);
+	for (u = 0; u < 20; u ++) {
+		if (x[u] > 0x1FFF) {
+			printf("INVALID:");
+			for (u = 0; u < 20; u ++) {
+				printf(" %04X", x[u]);
+			}
+			printf("\n");
+			return;
+		}
+	}
+	memset(tmp, 0, sizeof tmp);
+	for (u = 0; u < 20; u ++) {
+		uint32_t w;
+		int j, k;
+
+		w = x[u];
+		j = 13 * (int)u;
+		k = j & 7;
+		if (k != 0) {
+			w <<= k;
+			j -= k;
+		}
+		k = j >> 3;
+		tmp[35 - k] |= (unsigned char)w;
+		tmp[34 - k] |= (unsigned char)(w >> 8);
+		tmp[33 - k] |= (unsigned char)(w >> 16);
+		tmp[32 - k] |= (unsigned char)(w >> 24);
+	}
+	for (u = 4; u < 36; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+/*
+ * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
+ * that right-shifting a signed negative integer copies the sign bit
+ * (arithmetic right-shift). This is "implementation-defined behaviour",
+ * i.e. it is not undefined, but it may differ between compilers. Each
+ * compiler is supposed to document its behaviour in that respect. GCC
+ * explicitly defines that an arithmetic right shift is used. We expect
+ * all other compilers to do the same, because underlying CPU offer an
+ * arithmetic right shift opcode that could not be used otherwise.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)   (((uint32_t)(x) >> (n)) \
+                    | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#else
+#define ARSH(x, n)   ((*(int32_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Convert an integer from unsigned little-endian encoding to a sequence of
+ * 13-bit words in little-endian order. The final "partial" word is
+ * returned.
+ */
+static uint32_t
+le8_to_le13(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		acc |= (uint32_t)(*src ++) << acc_len;
+		acc_len += 8;
+		if (acc_len >= 13) {
+			*dst ++ = acc & 0x1FFF;
+			acc >>= 13;
+			acc_len -= 13;
+		}
+	}
+	return acc;
+}
+
+/*
+ * Convert an integer (13-bit words, little-endian) to unsigned
+ * little-endian encoding. The total encoding length is provided; all
+ * the destination bytes will be filled.
+ */
+static void
+le13_to_le8(unsigned char *dst, size_t len, const uint32_t *src)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			acc |= (*src ++) << acc_len;
+			acc_len += 13;
+		}
+		*dst ++ = (unsigned char)acc;
+		acc >>= 8;
+		acc_len -= 8;
+	}
+}
+
+/*
+ * Normalise an array of words to a strict 13 bits per word. Returned
+ * value is the resulting carry. The source (w) and destination (d)
+ * arrays may be identical, but shall not overlap partially.
+ */
+static inline uint32_t
+norm13(uint32_t *d, const uint32_t *w, size_t len)
+{
+	size_t u;
+	uint32_t cc;
+
+	cc = 0;
+	for (u = 0; u < len; u ++) {
+		int32_t z;
+
+		z = w[u] + cc;
+		d[u] = z & 0x1FFF;
+		cc = ARSH(z, 13);
+	}
+	return cc;
+}
+
+/*
+ * mul20() multiplies two 260-bit integers together. Each word must fit
+ * on 13 bits; source operands use 20 words, destination operand
+ * receives 40 words. All overlaps allowed.
+ *
+ * square20() computes the square of a 260-bit integer. Each word must
+ * fit on 13 bits; source operand uses 20 words, destination operand
+ * receives 40 words. All overlaps allowed.
+ */
+
+#if BR_SLOW_MUL15
+
+static void
+mul20(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Two-level Karatsuba: turns a 20x20 multiplication into
+	 * nine 5x5 multiplications. We use 13-bit words but do not
+	 * propagate carries immediately, so words may expand:
+	 *
+	 *  - First Karatsuba decomposition turns the 20x20 mul on
+	 *    13-bit words into three 10x10 muls, two on 13-bit words
+	 *    and one on 14-bit words.
+	 *
+	 *  - Second Karatsuba decomposition further splits these into:
+	 *
+	 *     * four 5x5 muls on 13-bit words
+	 *     * four 5x5 muls on 14-bit words
+	 *     * one 5x5 mul on 15-bit words
+	 *
+	 * Highest word value is 8191, 16382 or 32764, for 13-bit, 14-bit
+	 * or 15-bit words, respectively.
+	 */
+	uint32_t u[45], v[45], w[90];
+	uint32_t cc;
+	int i;
+
+#define ZADD(dw, d_off, s1w, s1_off, s2w, s2_off)   do { \
+		(dw)[5 * (d_off) + 0] = (s1w)[5 * (s1_off) + 0] \
+			+ (s2w)[5 * (s2_off) + 0]; \
+		(dw)[5 * (d_off) + 1] = (s1w)[5 * (s1_off) + 1] \
+			+ (s2w)[5 * (s2_off) + 1]; \
+		(dw)[5 * (d_off) + 2] = (s1w)[5 * (s1_off) + 2] \
+			+ (s2w)[5 * (s2_off) + 2]; \
+		(dw)[5 * (d_off) + 3] = (s1w)[5 * (s1_off) + 3] \
+			+ (s2w)[5 * (s2_off) + 3]; \
+		(dw)[5 * (d_off) + 4] = (s1w)[5 * (s1_off) + 4] \
+			+ (s2w)[5 * (s2_off) + 4]; \
+	} while (0)
+
+#define ZADDT(dw, d_off, sw, s_off)   do { \
+		(dw)[5 * (d_off) + 0] += (sw)[5 * (s_off) + 0]; \
+		(dw)[5 * (d_off) + 1] += (sw)[5 * (s_off) + 1]; \
+		(dw)[5 * (d_off) + 2] += (sw)[5 * (s_off) + 2]; \
+		(dw)[5 * (d_off) + 3] += (sw)[5 * (s_off) + 3]; \
+		(dw)[5 * (d_off) + 4] += (sw)[5 * (s_off) + 4]; \
+	} while (0)
+
+#define ZSUB2F(dw, d_off, s1w, s1_off, s2w, s2_off)   do { \
+		(dw)[5 * (d_off) + 0] -= (s1w)[5 * (s1_off) + 0] \
+			+ (s2w)[5 * (s2_off) + 0]; \
+		(dw)[5 * (d_off) + 1] -= (s1w)[5 * (s1_off) + 1] \
+			+ (s2w)[5 * (s2_off) + 1]; \
+		(dw)[5 * (d_off) + 2] -= (s1w)[5 * (s1_off) + 2] \
+			+ (s2w)[5 * (s2_off) + 2]; \
+		(dw)[5 * (d_off) + 3] -= (s1w)[5 * (s1_off) + 3] \
+			+ (s2w)[5 * (s2_off) + 3]; \
+		(dw)[5 * (d_off) + 4] -= (s1w)[5 * (s1_off) + 4] \
+			+ (s2w)[5 * (s2_off) + 4]; \
+	} while (0)
+
+#define CPR1(w, cprcc)   do { \
+		uint32_t cprz = (w) + cprcc; \
+		(w) = cprz & 0x1FFF; \
+		cprcc = cprz >> 13; \
+	} while (0)
+
+#define CPR(dw, d_off)   do { \
+		uint32_t cprcc; \
+		cprcc = 0; \
+		CPR1((dw)[(d_off) + 0], cprcc); \
+		CPR1((dw)[(d_off) + 1], cprcc); \
+		CPR1((dw)[(d_off) + 2], cprcc); \
+		CPR1((dw)[(d_off) + 3], cprcc); \
+		CPR1((dw)[(d_off) + 4], cprcc); \
+		CPR1((dw)[(d_off) + 5], cprcc); \
+		CPR1((dw)[(d_off) + 6], cprcc); \
+		CPR1((dw)[(d_off) + 7], cprcc); \
+		CPR1((dw)[(d_off) + 8], cprcc); \
+		(dw)[(d_off) + 9] = cprcc; \
+	} while (0)
+
+	memcpy(u, a, 20 * sizeof *a);
+	ZADD(u, 4, a, 0, a, 1);
+	ZADD(u, 5, a, 2, a, 3);
+	ZADD(u, 6, a, 0, a, 2);
+	ZADD(u, 7, a, 1, a, 3);
+	ZADD(u, 8, u, 6, u, 7);
+
+	memcpy(v, b, 20 * sizeof *b);
+	ZADD(v, 4, b, 0, b, 1);
+	ZADD(v, 5, b, 2, b, 3);
+	ZADD(v, 6, b, 0, b, 2);
+	ZADD(v, 7, b, 1, b, 3);
+	ZADD(v, 8, v, 6, v, 7);
+
+	/*
+	 * Do the eight first 8x8 muls. Source words are at most 16382
+	 * each, so we can add product results together "as is" in 32-bit
+	 * words.
+	 */
+	for (i = 0; i < 40; i += 5) {
+		w[(i << 1) + 0] = MUL15(u[i + 0], v[i + 0]);
+		w[(i << 1) + 1] = MUL15(u[i + 0], v[i + 1])
+			+ MUL15(u[i + 1], v[i + 0]);
+		w[(i << 1) + 2] = MUL15(u[i + 0], v[i + 2])
+			+ MUL15(u[i + 1], v[i + 1])
+			+ MUL15(u[i + 2], v[i + 0]);
+		w[(i << 1) + 3] = MUL15(u[i + 0], v[i + 3])
+			+ MUL15(u[i + 1], v[i + 2])
+			+ MUL15(u[i + 2], v[i + 1])
+			+ MUL15(u[i + 3], v[i + 0]);
+		w[(i << 1) + 4] = MUL15(u[i + 0], v[i + 4])
+			+ MUL15(u[i + 1], v[i + 3])
+			+ MUL15(u[i + 2], v[i + 2])
+			+ MUL15(u[i + 3], v[i + 1])
+			+ MUL15(u[i + 4], v[i + 0]);
+		w[(i << 1) + 5] = MUL15(u[i + 1], v[i + 4])
+			+ MUL15(u[i + 2], v[i + 3])
+			+ MUL15(u[i + 3], v[i + 2])
+			+ MUL15(u[i + 4], v[i + 1]);
+		w[(i << 1) + 6] = MUL15(u[i + 2], v[i + 4])
+			+ MUL15(u[i + 3], v[i + 3])
+			+ MUL15(u[i + 4], v[i + 2]);
+		w[(i << 1) + 7] = MUL15(u[i + 3], v[i + 4])
+			+ MUL15(u[i + 4], v[i + 3]);
+		w[(i << 1) + 8] = MUL15(u[i + 4], v[i + 4]);
+		w[(i << 1) + 9] = 0;
+	}
+
+	/*
+	 * For the 9th multiplication, source words are up to 32764,
+	 * so we must do some carry propagation. If we add up to
+	 * 4 products and the carry is no more than 524224, then the
+	 * result fits in 32 bits, and the next carry will be no more
+	 * than 524224 (because 4*(32764^2)+524224 < 8192*524225).
+	 *
+	 * We thus just skip one of the products in the middle word,
+	 * then do a carry propagation (this reduces words to 13 bits
+	 * each, except possibly the last, which may use up to 17 bits
+	 * or so), then add the missing product.
+	 */
+	w[80 + 0] = MUL15(u[40 + 0], v[40 + 0]);
+	w[80 + 1] = MUL15(u[40 + 0], v[40 + 1])
+		+ MUL15(u[40 + 1], v[40 + 0]);
+	w[80 + 2] = MUL15(u[40 + 0], v[40 + 2])
+		+ MUL15(u[40 + 1], v[40 + 1])
+		+ MUL15(u[40 + 2], v[40 + 0]);
+	w[80 + 3] = MUL15(u[40 + 0], v[40 + 3])
+		+ MUL15(u[40 + 1], v[40 + 2])
+		+ MUL15(u[40 + 2], v[40 + 1])
+		+ MUL15(u[40 + 3], v[40 + 0]);
+	w[80 + 4] = MUL15(u[40 + 0], v[40 + 4])
+		+ MUL15(u[40 + 1], v[40 + 3])
+		+ MUL15(u[40 + 2], v[40 + 2])
+		+ MUL15(u[40 + 3], v[40 + 1]);
+		/* + MUL15(u[40 + 4], v[40 + 0]) */
+	w[80 + 5] = MUL15(u[40 + 1], v[40 + 4])
+		+ MUL15(u[40 + 2], v[40 + 3])
+		+ MUL15(u[40 + 3], v[40 + 2])
+		+ MUL15(u[40 + 4], v[40 + 1]);
+	w[80 + 6] = MUL15(u[40 + 2], v[40 + 4])
+		+ MUL15(u[40 + 3], v[40 + 3])
+		+ MUL15(u[40 + 4], v[40 + 2]);
+	w[80 + 7] = MUL15(u[40 + 3], v[40 + 4])
+		+ MUL15(u[40 + 4], v[40 + 3]);
+	w[80 + 8] = MUL15(u[40 + 4], v[40 + 4]);
+
+	CPR(w, 80);
+
+	w[80 + 4] += MUL15(u[40 + 4], v[40 + 0]);
+
+	/*
+	 * The products on 14-bit words in slots 6 and 7 yield values
+	 * up to 5*(16382^2) each, and we need to subtract two such
+	 * values from the higher word. We need the subtraction to fit
+	 * in a _signed_ 32-bit integer, i.e. 31 bits + a sign bit.
+	 * However, 10*(16382^2) does not fit. So we must perform a
+	 * bit of reduction here.
+	 */
+	CPR(w, 60);
+	CPR(w, 70);
+
+	/*
+	 * Recompose results.
+	 */
+
+	/* 0..1*0..1 into 0..3 */
+	ZSUB2F(w, 8, w, 0, w, 2);
+	ZSUB2F(w, 9, w, 1, w, 3);
+	ZADDT(w, 1, w, 8);
+	ZADDT(w, 2, w, 9);
+
+	/* 2..3*2..3 into 4..7 */
+	ZSUB2F(w, 10, w, 4, w, 6);
+	ZSUB2F(w, 11, w, 5, w, 7);
+	ZADDT(w, 5, w, 10);
+	ZADDT(w, 6, w, 11);
+
+	/* (0..1+2..3)*(0..1+2..3) into 12..15 */
+	ZSUB2F(w, 16, w, 12, w, 14);
+	ZSUB2F(w, 17, w, 13, w, 15);
+	ZADDT(w, 13, w, 16);
+	ZADDT(w, 14, w, 17);
+
+	/* first-level recomposition */
+	ZSUB2F(w, 12, w, 0, w, 4);
+	ZSUB2F(w, 13, w, 1, w, 5);
+	ZSUB2F(w, 14, w, 2, w, 6);
+	ZSUB2F(w, 15, w, 3, w, 7);
+	ZADDT(w, 2, w, 12);
+	ZADDT(w, 3, w, 13);
+	ZADDT(w, 4, w, 14);
+	ZADDT(w, 5, w, 15);
+
+	/*
+	 * Perform carry propagation to bring all words down to 13 bits.
+	 */
+	cc = norm13(d, w, 40);
+	d[39] += (cc << 13);
+
+#undef ZADD
+#undef ZADDT
+#undef ZSUB2F
+#undef CPR1
+#undef CPR
+}
+
+static inline void
+square20(uint32_t *d, const uint32_t *a)
+{
+	mul20(d, a, a);
+}
+
+#else
+
+static void
+mul20(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[39];
+
+	t[ 0] = MUL15(a[ 0], b[ 0]);
+	t[ 1] = MUL15(a[ 0], b[ 1])
+		+ MUL15(a[ 1], b[ 0]);
+	t[ 2] = MUL15(a[ 0], b[ 2])
+		+ MUL15(a[ 1], b[ 1])
+		+ MUL15(a[ 2], b[ 0]);
+	t[ 3] = MUL15(a[ 0], b[ 3])
+		+ MUL15(a[ 1], b[ 2])
+		+ MUL15(a[ 2], b[ 1])
+		+ MUL15(a[ 3], b[ 0]);
+	t[ 4] = MUL15(a[ 0], b[ 4])
+		+ MUL15(a[ 1], b[ 3])
+		+ MUL15(a[ 2], b[ 2])
+		+ MUL15(a[ 3], b[ 1])
+		+ MUL15(a[ 4], b[ 0]);
+	t[ 5] = MUL15(a[ 0], b[ 5])
+		+ MUL15(a[ 1], b[ 4])
+		+ MUL15(a[ 2], b[ 3])
+		+ MUL15(a[ 3], b[ 2])
+		+ MUL15(a[ 4], b[ 1])
+		+ MUL15(a[ 5], b[ 0]);
+	t[ 6] = MUL15(a[ 0], b[ 6])
+		+ MUL15(a[ 1], b[ 5])
+		+ MUL15(a[ 2], b[ 4])
+		+ MUL15(a[ 3], b[ 3])
+		+ MUL15(a[ 4], b[ 2])
+		+ MUL15(a[ 5], b[ 1])
+		+ MUL15(a[ 6], b[ 0]);
+	t[ 7] = MUL15(a[ 0], b[ 7])
+		+ MUL15(a[ 1], b[ 6])
+		+ MUL15(a[ 2], b[ 5])
+		+ MUL15(a[ 3], b[ 4])
+		+ MUL15(a[ 4], b[ 3])
+		+ MUL15(a[ 5], b[ 2])
+		+ MUL15(a[ 6], b[ 1])
+		+ MUL15(a[ 7], b[ 0]);
+	t[ 8] = MUL15(a[ 0], b[ 8])
+		+ MUL15(a[ 1], b[ 7])
+		+ MUL15(a[ 2], b[ 6])
+		+ MUL15(a[ 3], b[ 5])
+		+ MUL15(a[ 4], b[ 4])
+		+ MUL15(a[ 5], b[ 3])
+		+ MUL15(a[ 6], b[ 2])
+		+ MUL15(a[ 7], b[ 1])
+		+ MUL15(a[ 8], b[ 0]);
+	t[ 9] = MUL15(a[ 0], b[ 9])
+		+ MUL15(a[ 1], b[ 8])
+		+ MUL15(a[ 2], b[ 7])
+		+ MUL15(a[ 3], b[ 6])
+		+ MUL15(a[ 4], b[ 5])
+		+ MUL15(a[ 5], b[ 4])
+		+ MUL15(a[ 6], b[ 3])
+		+ MUL15(a[ 7], b[ 2])
+		+ MUL15(a[ 8], b[ 1])
+		+ MUL15(a[ 9], b[ 0]);
+	t[10] = MUL15(a[ 0], b[10])
+		+ MUL15(a[ 1], b[ 9])
+		+ MUL15(a[ 2], b[ 8])
+		+ MUL15(a[ 3], b[ 7])
+		+ MUL15(a[ 4], b[ 6])
+		+ MUL15(a[ 5], b[ 5])
+		+ MUL15(a[ 6], b[ 4])
+		+ MUL15(a[ 7], b[ 3])
+		+ MUL15(a[ 8], b[ 2])
+		+ MUL15(a[ 9], b[ 1])
+		+ MUL15(a[10], b[ 0]);
+	t[11] = MUL15(a[ 0], b[11])
+		+ MUL15(a[ 1], b[10])
+		+ MUL15(a[ 2], b[ 9])
+		+ MUL15(a[ 3], b[ 8])
+		+ MUL15(a[ 4], b[ 7])
+		+ MUL15(a[ 5], b[ 6])
+		+ MUL15(a[ 6], b[ 5])
+		+ MUL15(a[ 7], b[ 4])
+		+ MUL15(a[ 8], b[ 3])
+		+ MUL15(a[ 9], b[ 2])
+		+ MUL15(a[10], b[ 1])
+		+ MUL15(a[11], b[ 0]);
+	t[12] = MUL15(a[ 0], b[12])
+		+ MUL15(a[ 1], b[11])
+		+ MUL15(a[ 2], b[10])
+		+ MUL15(a[ 3], b[ 9])
+		+ MUL15(a[ 4], b[ 8])
+		+ MUL15(a[ 5], b[ 7])
+		+ MUL15(a[ 6], b[ 6])
+		+ MUL15(a[ 7], b[ 5])
+		+ MUL15(a[ 8], b[ 4])
+		+ MUL15(a[ 9], b[ 3])
+		+ MUL15(a[10], b[ 2])
+		+ MUL15(a[11], b[ 1])
+		+ MUL15(a[12], b[ 0]);
+	t[13] = MUL15(a[ 0], b[13])
+		+ MUL15(a[ 1], b[12])
+		+ MUL15(a[ 2], b[11])
+		+ MUL15(a[ 3], b[10])
+		+ MUL15(a[ 4], b[ 9])
+		+ MUL15(a[ 5], b[ 8])
+		+ MUL15(a[ 6], b[ 7])
+		+ MUL15(a[ 7], b[ 6])
+		+ MUL15(a[ 8], b[ 5])
+		+ MUL15(a[ 9], b[ 4])
+		+ MUL15(a[10], b[ 3])
+		+ MUL15(a[11], b[ 2])
+		+ MUL15(a[12], b[ 1])
+		+ MUL15(a[13], b[ 0]);
+	t[14] = MUL15(a[ 0], b[14])
+		+ MUL15(a[ 1], b[13])
+		+ MUL15(a[ 2], b[12])
+		+ MUL15(a[ 3], b[11])
+		+ MUL15(a[ 4], b[10])
+		+ MUL15(a[ 5], b[ 9])
+		+ MUL15(a[ 6], b[ 8])
+		+ MUL15(a[ 7], b[ 7])
+		+ MUL15(a[ 8], b[ 6])
+		+ MUL15(a[ 9], b[ 5])
+		+ MUL15(a[10], b[ 4])
+		+ MUL15(a[11], b[ 3])
+		+ MUL15(a[12], b[ 2])
+		+ MUL15(a[13], b[ 1])
+		+ MUL15(a[14], b[ 0]);
+	t[15] = MUL15(a[ 0], b[15])
+		+ MUL15(a[ 1], b[14])
+		+ MUL15(a[ 2], b[13])
+		+ MUL15(a[ 3], b[12])
+		+ MUL15(a[ 4], b[11])
+		+ MUL15(a[ 5], b[10])
+		+ MUL15(a[ 6], b[ 9])
+		+ MUL15(a[ 7], b[ 8])
+		+ MUL15(a[ 8], b[ 7])
+		+ MUL15(a[ 9], b[ 6])
+		+ MUL15(a[10], b[ 5])
+		+ MUL15(a[11], b[ 4])
+		+ MUL15(a[12], b[ 3])
+		+ MUL15(a[13], b[ 2])
+		+ MUL15(a[14], b[ 1])
+		+ MUL15(a[15], b[ 0]);
+	t[16] = MUL15(a[ 0], b[16])
+		+ MUL15(a[ 1], b[15])
+		+ MUL15(a[ 2], b[14])
+		+ MUL15(a[ 3], b[13])
+		+ MUL15(a[ 4], b[12])
+		+ MUL15(a[ 5], b[11])
+		+ MUL15(a[ 6], b[10])
+		+ MUL15(a[ 7], b[ 9])
+		+ MUL15(a[ 8], b[ 8])
+		+ MUL15(a[ 9], b[ 7])
+		+ MUL15(a[10], b[ 6])
+		+ MUL15(a[11], b[ 5])
+		+ MUL15(a[12], b[ 4])
+		+ MUL15(a[13], b[ 3])
+		+ MUL15(a[14], b[ 2])
+		+ MUL15(a[15], b[ 1])
+		+ MUL15(a[16], b[ 0]);
+	t[17] = MUL15(a[ 0], b[17])
+		+ MUL15(a[ 1], b[16])
+		+ MUL15(a[ 2], b[15])
+		+ MUL15(a[ 3], b[14])
+		+ MUL15(a[ 4], b[13])
+		+ MUL15(a[ 5], b[12])
+		+ MUL15(a[ 6], b[11])
+		+ MUL15(a[ 7], b[10])
+		+ MUL15(a[ 8], b[ 9])
+		+ MUL15(a[ 9], b[ 8])
+		+ MUL15(a[10], b[ 7])
+		+ MUL15(a[11], b[ 6])
+		+ MUL15(a[12], b[ 5])
+		+ MUL15(a[13], b[ 4])
+		+ MUL15(a[14], b[ 3])
+		+ MUL15(a[15], b[ 2])
+		+ MUL15(a[16], b[ 1])
+		+ MUL15(a[17], b[ 0]);
+	t[18] = MUL15(a[ 0], b[18])
+		+ MUL15(a[ 1], b[17])
+		+ MUL15(a[ 2], b[16])
+		+ MUL15(a[ 3], b[15])
+		+ MUL15(a[ 4], b[14])
+		+ MUL15(a[ 5], b[13])
+		+ MUL15(a[ 6], b[12])
+		+ MUL15(a[ 7], b[11])
+		+ MUL15(a[ 8], b[10])
+		+ MUL15(a[ 9], b[ 9])
+		+ MUL15(a[10], b[ 8])
+		+ MUL15(a[11], b[ 7])
+		+ MUL15(a[12], b[ 6])
+		+ MUL15(a[13], b[ 5])
+		+ MUL15(a[14], b[ 4])
+		+ MUL15(a[15], b[ 3])
+		+ MUL15(a[16], b[ 2])
+		+ MUL15(a[17], b[ 1])
+		+ MUL15(a[18], b[ 0]);
+	t[19] = MUL15(a[ 0], b[19])
+		+ MUL15(a[ 1], b[18])
+		+ MUL15(a[ 2], b[17])
+		+ MUL15(a[ 3], b[16])
+		+ MUL15(a[ 4], b[15])
+		+ MUL15(a[ 5], b[14])
+		+ MUL15(a[ 6], b[13])
+		+ MUL15(a[ 7], b[12])
+		+ MUL15(a[ 8], b[11])
+		+ MUL15(a[ 9], b[10])
+		+ MUL15(a[10], b[ 9])
+		+ MUL15(a[11], b[ 8])
+		+ MUL15(a[12], b[ 7])
+		+ MUL15(a[13], b[ 6])
+		+ MUL15(a[14], b[ 5])
+		+ MUL15(a[15], b[ 4])
+		+ MUL15(a[16], b[ 3])
+		+ MUL15(a[17], b[ 2])
+		+ MUL15(a[18], b[ 1])
+		+ MUL15(a[19], b[ 0]);
+	t[20] = MUL15(a[ 1], b[19])
+		+ MUL15(a[ 2], b[18])
+		+ MUL15(a[ 3], b[17])
+		+ MUL15(a[ 4], b[16])
+		+ MUL15(a[ 5], b[15])
+		+ MUL15(a[ 6], b[14])
+		+ MUL15(a[ 7], b[13])
+		+ MUL15(a[ 8], b[12])
+		+ MUL15(a[ 9], b[11])
+		+ MUL15(a[10], b[10])
+		+ MUL15(a[11], b[ 9])
+		+ MUL15(a[12], b[ 8])
+		+ MUL15(a[13], b[ 7])
+		+ MUL15(a[14], b[ 6])
+		+ MUL15(a[15], b[ 5])
+		+ MUL15(a[16], b[ 4])
+		+ MUL15(a[17], b[ 3])
+		+ MUL15(a[18], b[ 2])
+		+ MUL15(a[19], b[ 1]);
+	t[21] = MUL15(a[ 2], b[19])
+		+ MUL15(a[ 3], b[18])
+		+ MUL15(a[ 4], b[17])
+		+ MUL15(a[ 5], b[16])
+		+ MUL15(a[ 6], b[15])
+		+ MUL15(a[ 7], b[14])
+		+ MUL15(a[ 8], b[13])
+		+ MUL15(a[ 9], b[12])
+		+ MUL15(a[10], b[11])
+		+ MUL15(a[11], b[10])
+		+ MUL15(a[12], b[ 9])
+		+ MUL15(a[13], b[ 8])
+		+ MUL15(a[14], b[ 7])
+		+ MUL15(a[15], b[ 6])
+		+ MUL15(a[16], b[ 5])
+		+ MUL15(a[17], b[ 4])
+		+ MUL15(a[18], b[ 3])
+		+ MUL15(a[19], b[ 2]);
+	t[22] = MUL15(a[ 3], b[19])
+		+ MUL15(a[ 4], b[18])
+		+ MUL15(a[ 5], b[17])
+		+ MUL15(a[ 6], b[16])
+		+ MUL15(a[ 7], b[15])
+		+ MUL15(a[ 8], b[14])
+		+ MUL15(a[ 9], b[13])
+		+ MUL15(a[10], b[12])
+		+ MUL15(a[11], b[11])
+		+ MUL15(a[12], b[10])
+		+ MUL15(a[13], b[ 9])
+		+ MUL15(a[14], b[ 8])
+		+ MUL15(a[15], b[ 7])
+		+ MUL15(a[16], b[ 6])
+		+ MUL15(a[17], b[ 5])
+		+ MUL15(a[18], b[ 4])
+		+ MUL15(a[19], b[ 3]);
+	t[23] = MUL15(a[ 4], b[19])
+		+ MUL15(a[ 5], b[18])
+		+ MUL15(a[ 6], b[17])
+		+ MUL15(a[ 7], b[16])
+		+ MUL15(a[ 8], b[15])
+		+ MUL15(a[ 9], b[14])
+		+ MUL15(a[10], b[13])
+		+ MUL15(a[11], b[12])
+		+ MUL15(a[12], b[11])
+		+ MUL15(a[13], b[10])
+		+ MUL15(a[14], b[ 9])
+		+ MUL15(a[15], b[ 8])
+		+ MUL15(a[16], b[ 7])
+		+ MUL15(a[17], b[ 6])
+		+ MUL15(a[18], b[ 5])
+		+ MUL15(a[19], b[ 4]);
+	t[24] = MUL15(a[ 5], b[19])
+		+ MUL15(a[ 6], b[18])
+		+ MUL15(a[ 7], b[17])
+		+ MUL15(a[ 8], b[16])
+		+ MUL15(a[ 9], b[15])
+		+ MUL15(a[10], b[14])
+		+ MUL15(a[11], b[13])
+		+ MUL15(a[12], b[12])
+		+ MUL15(a[13], b[11])
+		+ MUL15(a[14], b[10])
+		+ MUL15(a[15], b[ 9])
+		+ MUL15(a[16], b[ 8])
+		+ MUL15(a[17], b[ 7])
+		+ MUL15(a[18], b[ 6])
+		+ MUL15(a[19], b[ 5]);
+	t[25] = MUL15(a[ 6], b[19])
+		+ MUL15(a[ 7], b[18])
+		+ MUL15(a[ 8], b[17])
+		+ MUL15(a[ 9], b[16])
+		+ MUL15(a[10], b[15])
+		+ MUL15(a[11], b[14])
+		+ MUL15(a[12], b[13])
+		+ MUL15(a[13], b[12])
+		+ MUL15(a[14], b[11])
+		+ MUL15(a[15], b[10])
+		+ MUL15(a[16], b[ 9])
+		+ MUL15(a[17], b[ 8])
+		+ MUL15(a[18], b[ 7])
+		+ MUL15(a[19], b[ 6]);
+	t[26] = MUL15(a[ 7], b[19])
+		+ MUL15(a[ 8], b[18])
+		+ MUL15(a[ 9], b[17])
+		+ MUL15(a[10], b[16])
+		+ MUL15(a[11], b[15])
+		+ MUL15(a[12], b[14])
+		+ MUL15(a[13], b[13])
+		+ MUL15(a[14], b[12])
+		+ MUL15(a[15], b[11])
+		+ MUL15(a[16], b[10])
+		+ MUL15(a[17], b[ 9])
+		+ MUL15(a[18], b[ 8])
+		+ MUL15(a[19], b[ 7]);
+	t[27] = MUL15(a[ 8], b[19])
+		+ MUL15(a[ 9], b[18])
+		+ MUL15(a[10], b[17])
+		+ MUL15(a[11], b[16])
+		+ MUL15(a[12], b[15])
+		+ MUL15(a[13], b[14])
+		+ MUL15(a[14], b[13])
+		+ MUL15(a[15], b[12])
+		+ MUL15(a[16], b[11])
+		+ MUL15(a[17], b[10])
+		+ MUL15(a[18], b[ 9])
+		+ MUL15(a[19], b[ 8]);
+	t[28] = MUL15(a[ 9], b[19])
+		+ MUL15(a[10], b[18])
+		+ MUL15(a[11], b[17])
+		+ MUL15(a[12], b[16])
+		+ MUL15(a[13], b[15])
+		+ MUL15(a[14], b[14])
+		+ MUL15(a[15], b[13])
+		+ MUL15(a[16], b[12])
+		+ MUL15(a[17], b[11])
+		+ MUL15(a[18], b[10])
+		+ MUL15(a[19], b[ 9]);
+	t[29] = MUL15(a[10], b[19])
+		+ MUL15(a[11], b[18])
+		+ MUL15(a[12], b[17])
+		+ MUL15(a[13], b[16])
+		+ MUL15(a[14], b[15])
+		+ MUL15(a[15], b[14])
+		+ MUL15(a[16], b[13])
+		+ MUL15(a[17], b[12])
+		+ MUL15(a[18], b[11])
+		+ MUL15(a[19], b[10]);
+	t[30] = MUL15(a[11], b[19])
+		+ MUL15(a[12], b[18])
+		+ MUL15(a[13], b[17])
+		+ MUL15(a[14], b[16])
+		+ MUL15(a[15], b[15])
+		+ MUL15(a[16], b[14])
+		+ MUL15(a[17], b[13])
+		+ MUL15(a[18], b[12])
+		+ MUL15(a[19], b[11]);
+	t[31] = MUL15(a[12], b[19])
+		+ MUL15(a[13], b[18])
+		+ MUL15(a[14], b[17])
+		+ MUL15(a[15], b[16])
+		+ MUL15(a[16], b[15])
+		+ MUL15(a[17], b[14])
+		+ MUL15(a[18], b[13])
+		+ MUL15(a[19], b[12]);
+	t[32] = MUL15(a[13], b[19])
+		+ MUL15(a[14], b[18])
+		+ MUL15(a[15], b[17])
+		+ MUL15(a[16], b[16])
+		+ MUL15(a[17], b[15])
+		+ MUL15(a[18], b[14])
+		+ MUL15(a[19], b[13]);
+	t[33] = MUL15(a[14], b[19])
+		+ MUL15(a[15], b[18])
+		+ MUL15(a[16], b[17])
+		+ MUL15(a[17], b[16])
+		+ MUL15(a[18], b[15])
+		+ MUL15(a[19], b[14]);
+	t[34] = MUL15(a[15], b[19])
+		+ MUL15(a[16], b[18])
+		+ MUL15(a[17], b[17])
+		+ MUL15(a[18], b[16])
+		+ MUL15(a[19], b[15]);
+	t[35] = MUL15(a[16], b[19])
+		+ MUL15(a[17], b[18])
+		+ MUL15(a[18], b[17])
+		+ MUL15(a[19], b[16]);
+	t[36] = MUL15(a[17], b[19])
+		+ MUL15(a[18], b[18])
+		+ MUL15(a[19], b[17]);
+	t[37] = MUL15(a[18], b[19])
+		+ MUL15(a[19], b[18]);
+	t[38] = MUL15(a[19], b[19]);
+
+	d[39] = norm13(d, t, 39);
+}
+
+static void
+square20(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[39];
+
+	t[ 0] = MUL15(a[ 0], a[ 0]);
+	t[ 1] = ((MUL15(a[ 0], a[ 1])) << 1);
+	t[ 2] = MUL15(a[ 1], a[ 1])
+		+ ((MUL15(a[ 0], a[ 2])) << 1);
+	t[ 3] = ((MUL15(a[ 0], a[ 3])
+		+ MUL15(a[ 1], a[ 2])) << 1);
+	t[ 4] = MUL15(a[ 2], a[ 2])
+		+ ((MUL15(a[ 0], a[ 4])
+		+ MUL15(a[ 1], a[ 3])) << 1);
+	t[ 5] = ((MUL15(a[ 0], a[ 5])
+		+ MUL15(a[ 1], a[ 4])
+		+ MUL15(a[ 2], a[ 3])) << 1);
+	t[ 6] = MUL15(a[ 3], a[ 3])
+		+ ((MUL15(a[ 0], a[ 6])
+		+ MUL15(a[ 1], a[ 5])
+		+ MUL15(a[ 2], a[ 4])) << 1);
+	t[ 7] = ((MUL15(a[ 0], a[ 7])
+		+ MUL15(a[ 1], a[ 6])
+		+ MUL15(a[ 2], a[ 5])
+		+ MUL15(a[ 3], a[ 4])) << 1);
+	t[ 8] = MUL15(a[ 4], a[ 4])
+		+ ((MUL15(a[ 0], a[ 8])
+		+ MUL15(a[ 1], a[ 7])
+		+ MUL15(a[ 2], a[ 6])
+		+ MUL15(a[ 3], a[ 5])) << 1);
+	t[ 9] = ((MUL15(a[ 0], a[ 9])
+		+ MUL15(a[ 1], a[ 8])
+		+ MUL15(a[ 2], a[ 7])
+		+ MUL15(a[ 3], a[ 6])
+		+ MUL15(a[ 4], a[ 5])) << 1);
+	t[10] = MUL15(a[ 5], a[ 5])
+		+ ((MUL15(a[ 0], a[10])
+		+ MUL15(a[ 1], a[ 9])
+		+ MUL15(a[ 2], a[ 8])
+		+ MUL15(a[ 3], a[ 7])
+		+ MUL15(a[ 4], a[ 6])) << 1);
+	t[11] = ((MUL15(a[ 0], a[11])
+		+ MUL15(a[ 1], a[10])
+		+ MUL15(a[ 2], a[ 9])
+		+ MUL15(a[ 3], a[ 8])
+		+ MUL15(a[ 4], a[ 7])
+		+ MUL15(a[ 5], a[ 6])) << 1);
+	t[12] = MUL15(a[ 6], a[ 6])
+		+ ((MUL15(a[ 0], a[12])
+		+ MUL15(a[ 1], a[11])
+		+ MUL15(a[ 2], a[10])
+		+ MUL15(a[ 3], a[ 9])
+		+ MUL15(a[ 4], a[ 8])
+		+ MUL15(a[ 5], a[ 7])) << 1);
+	t[13] = ((MUL15(a[ 0], a[13])
+		+ MUL15(a[ 1], a[12])
+		+ MUL15(a[ 2], a[11])
+		+ MUL15(a[ 3], a[10])
+		+ MUL15(a[ 4], a[ 9])
+		+ MUL15(a[ 5], a[ 8])
+		+ MUL15(a[ 6], a[ 7])) << 1);
+	t[14] = MUL15(a[ 7], a[ 7])
+		+ ((MUL15(a[ 0], a[14])
+		+ MUL15(a[ 1], a[13])
+		+ MUL15(a[ 2], a[12])
+		+ MUL15(a[ 3], a[11])
+		+ MUL15(a[ 4], a[10])
+		+ MUL15(a[ 5], a[ 9])
+		+ MUL15(a[ 6], a[ 8])) << 1);
+	t[15] = ((MUL15(a[ 0], a[15])
+		+ MUL15(a[ 1], a[14])
+		+ MUL15(a[ 2], a[13])
+		+ MUL15(a[ 3], a[12])
+		+ MUL15(a[ 4], a[11])
+		+ MUL15(a[ 5], a[10])
+		+ MUL15(a[ 6], a[ 9])
+		+ MUL15(a[ 7], a[ 8])) << 1);
+	t[16] = MUL15(a[ 8], a[ 8])
+		+ ((MUL15(a[ 0], a[16])
+		+ MUL15(a[ 1], a[15])
+		+ MUL15(a[ 2], a[14])
+		+ MUL15(a[ 3], a[13])
+		+ MUL15(a[ 4], a[12])
+		+ MUL15(a[ 5], a[11])
+		+ MUL15(a[ 6], a[10])
+		+ MUL15(a[ 7], a[ 9])) << 1);
+	t[17] = ((MUL15(a[ 0], a[17])
+		+ MUL15(a[ 1], a[16])
+		+ MUL15(a[ 2], a[15])
+		+ MUL15(a[ 3], a[14])
+		+ MUL15(a[ 4], a[13])
+		+ MUL15(a[ 5], a[12])
+		+ MUL15(a[ 6], a[11])
+		+ MUL15(a[ 7], a[10])
+		+ MUL15(a[ 8], a[ 9])) << 1);
+	t[18] = MUL15(a[ 9], a[ 9])
+		+ ((MUL15(a[ 0], a[18])
+		+ MUL15(a[ 1], a[17])
+		+ MUL15(a[ 2], a[16])
+		+ MUL15(a[ 3], a[15])
+		+ MUL15(a[ 4], a[14])
+		+ MUL15(a[ 5], a[13])
+		+ MUL15(a[ 6], a[12])
+		+ MUL15(a[ 7], a[11])
+		+ MUL15(a[ 8], a[10])) << 1);
+	t[19] = ((MUL15(a[ 0], a[19])
+		+ MUL15(a[ 1], a[18])
+		+ MUL15(a[ 2], a[17])
+		+ MUL15(a[ 3], a[16])
+		+ MUL15(a[ 4], a[15])
+		+ MUL15(a[ 5], a[14])
+		+ MUL15(a[ 6], a[13])
+		+ MUL15(a[ 7], a[12])
+		+ MUL15(a[ 8], a[11])
+		+ MUL15(a[ 9], a[10])) << 1);
+	t[20] = MUL15(a[10], a[10])
+		+ ((MUL15(a[ 1], a[19])
+		+ MUL15(a[ 2], a[18])
+		+ MUL15(a[ 3], a[17])
+		+ MUL15(a[ 4], a[16])
+		+ MUL15(a[ 5], a[15])
+		+ MUL15(a[ 6], a[14])
+		+ MUL15(a[ 7], a[13])
+		+ MUL15(a[ 8], a[12])
+		+ MUL15(a[ 9], a[11])) << 1);
+	t[21] = ((MUL15(a[ 2], a[19])
+		+ MUL15(a[ 3], a[18])
+		+ MUL15(a[ 4], a[17])
+		+ MUL15(a[ 5], a[16])
+		+ MUL15(a[ 6], a[15])
+		+ MUL15(a[ 7], a[14])
+		+ MUL15(a[ 8], a[13])
+		+ MUL15(a[ 9], a[12])
+		+ MUL15(a[10], a[11])) << 1);
+	t[22] = MUL15(a[11], a[11])
+		+ ((MUL15(a[ 3], a[19])
+		+ MUL15(a[ 4], a[18])
+		+ MUL15(a[ 5], a[17])
+		+ MUL15(a[ 6], a[16])
+		+ MUL15(a[ 7], a[15])
+		+ MUL15(a[ 8], a[14])
+		+ MUL15(a[ 9], a[13])
+		+ MUL15(a[10], a[12])) << 1);
+	t[23] = ((MUL15(a[ 4], a[19])
+		+ MUL15(a[ 5], a[18])
+		+ MUL15(a[ 6], a[17])
+		+ MUL15(a[ 7], a[16])
+		+ MUL15(a[ 8], a[15])
+		+ MUL15(a[ 9], a[14])
+		+ MUL15(a[10], a[13])
+		+ MUL15(a[11], a[12])) << 1);
+	t[24] = MUL15(a[12], a[12])
+		+ ((MUL15(a[ 5], a[19])
+		+ MUL15(a[ 6], a[18])
+		+ MUL15(a[ 7], a[17])
+		+ MUL15(a[ 8], a[16])
+		+ MUL15(a[ 9], a[15])
+		+ MUL15(a[10], a[14])
+		+ MUL15(a[11], a[13])) << 1);
+	t[25] = ((MUL15(a[ 6], a[19])
+		+ MUL15(a[ 7], a[18])
+		+ MUL15(a[ 8], a[17])
+		+ MUL15(a[ 9], a[16])
+		+ MUL15(a[10], a[15])
+		+ MUL15(a[11], a[14])
+		+ MUL15(a[12], a[13])) << 1);
+	t[26] = MUL15(a[13], a[13])
+		+ ((MUL15(a[ 7], a[19])
+		+ MUL15(a[ 8], a[18])
+		+ MUL15(a[ 9], a[17])
+		+ MUL15(a[10], a[16])
+		+ MUL15(a[11], a[15])
+		+ MUL15(a[12], a[14])) << 1);
+	t[27] = ((MUL15(a[ 8], a[19])
+		+ MUL15(a[ 9], a[18])
+		+ MUL15(a[10], a[17])
+		+ MUL15(a[11], a[16])
+		+ MUL15(a[12], a[15])
+		+ MUL15(a[13], a[14])) << 1);
+	t[28] = MUL15(a[14], a[14])
+		+ ((MUL15(a[ 9], a[19])
+		+ MUL15(a[10], a[18])
+		+ MUL15(a[11], a[17])
+		+ MUL15(a[12], a[16])
+		+ MUL15(a[13], a[15])) << 1);
+	t[29] = ((MUL15(a[10], a[19])
+		+ MUL15(a[11], a[18])
+		+ MUL15(a[12], a[17])
+		+ MUL15(a[13], a[16])
+		+ MUL15(a[14], a[15])) << 1);
+	t[30] = MUL15(a[15], a[15])
+		+ ((MUL15(a[11], a[19])
+		+ MUL15(a[12], a[18])
+		+ MUL15(a[13], a[17])
+		+ MUL15(a[14], a[16])) << 1);
+	t[31] = ((MUL15(a[12], a[19])
+		+ MUL15(a[13], a[18])
+		+ MUL15(a[14], a[17])
+		+ MUL15(a[15], a[16])) << 1);
+	t[32] = MUL15(a[16], a[16])
+		+ ((MUL15(a[13], a[19])
+		+ MUL15(a[14], a[18])
+		+ MUL15(a[15], a[17])) << 1);
+	t[33] = ((MUL15(a[14], a[19])
+		+ MUL15(a[15], a[18])
+		+ MUL15(a[16], a[17])) << 1);
+	t[34] = MUL15(a[17], a[17])
+		+ ((MUL15(a[15], a[19])
+		+ MUL15(a[16], a[18])) << 1);
+	t[35] = ((MUL15(a[16], a[19])
+		+ MUL15(a[17], a[18])) << 1);
+	t[36] = MUL15(a[18], a[18])
+		+ ((MUL15(a[17], a[19])) << 1);
+	t[37] = ((MUL15(a[18], a[19])) << 1);
+	t[38] = MUL15(a[19], a[19]);
+
+	d[39] = norm13(d, t, 39);
+}
+
+#endif
+
+/*
+ * Perform a "final reduction" in field F255 (field for Curve25519)
+ * The source value must be less than twice the modulus. If the value
+ * is not lower than the modulus, then the modulus is subtracted and
+ * this function returns 1; otherwise, it leaves it untouched and it
+ * returns 0.
+ */
+static uint32_t
+reduce_final_f255(uint32_t *d)
+{
+	uint32_t t[20];
+	uint32_t cc;
+	int i;
+
+	memcpy(t, d, sizeof t);
+	cc = 19;
+	for (i = 0; i < 20; i ++) {
+		uint32_t w;
+
+		w = t[i] + cc;
+		cc = w >> 13;
+		t[i] = w & 0x1FFF;
+	}
+	cc = t[19] >> 8;
+	t[19] &= 0xFF;
+	CCOPY(cc, d, t, sizeof t);
+	return cc;
+}
+
+static void
+f255_mulgen(uint32_t *d, const uint32_t *a, const uint32_t *b, int square)
+{
+	uint32_t t[40], cc, w;
+
+	/*
+	 * Compute raw multiplication. All result words fit in 13 bits
+	 * each; upper word (t[39]) must fit on 5 bits, since the product
+	 * of two 256-bit integers must fit on 512 bits.
+	 */
+	if (square) {
+		square20(t, a);
+	} else {
+		mul20(t, a, b);
+	}
+
+	/*
+	 * Modular reduction: each high word is added where necessary.
+	 * Since the modulus is 2^255-19 and word 20 corresponds to
+	 * offset 20*13 = 260, word 20+k must be added to word k with
+	 * a factor of 19*2^5 = 608. The extra bits in word 19 are also
+	 * added that way.
+	 */
+	cc = MUL15(t[19] >> 8, 19);
+	t[19] &= 0xFF;
+
+#define MM1(x)   do { \
+		w = t[x] + cc + MUL15(t[(x) + 20], 608); \
+		t[x] = w & 0x1FFF; \
+		cc = w >> 13; \
+	} while (0)
+
+	MM1( 0);
+	MM1( 1);
+	MM1( 2);
+	MM1( 3);
+	MM1( 4);
+	MM1( 5);
+	MM1( 6);
+	MM1( 7);
+	MM1( 8);
+	MM1( 9);
+	MM1(10);
+	MM1(11);
+	MM1(12);
+	MM1(13);
+	MM1(14);
+	MM1(15);
+	MM1(16);
+	MM1(17);
+	MM1(18);
+	MM1(19);
+
+#undef MM1
+
+	cc = MUL15(w >> 8, 19);
+	t[19] &= 0xFF;
+
+#define MM2(x)   do { \
+		w = t[x] + cc; \
+		d[x] = w & 0x1FFF; \
+		cc = w >> 13; \
+	} while (0)
+
+	MM2( 0);
+	MM2( 1);
+	MM2( 2);
+	MM2( 3);
+	MM2( 4);
+	MM2( 5);
+	MM2( 6);
+	MM2( 7);
+	MM2( 8);
+	MM2( 9);
+	MM2(10);
+	MM2(11);
+	MM2(12);
+	MM2(13);
+	MM2(14);
+	MM2(15);
+	MM2(16);
+	MM2(17);
+	MM2(18);
+	MM2(19);
+
+#undef MM2
+}
+
+/*
+ * Perform a multiplication of two integers modulo 2^255-19.
+ * Operands are arrays of 20 words, each containing 13 bits of data, in
+ * little-endian order. Input value may be up to 2^256-1; on output, value
+ * fits on 256 bits and is lower than twice the modulus.
+ *
+ * f255_mul() is the general multiplication, f255_square() is specialised
+ * for squarings.
+ */
+#define f255_mul(d, a, b)   f255_mulgen(d, a, b, 0)
+#define f255_square(d, a)   f255_mulgen(d, a, a, 1)
+
+/*
+ * Add two values in F255. Partial reduction is performed (down to less
+ * than twice the modulus).
+ */
+static void
+f255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	int i;
+	uint32_t cc, w;
+
+	cc = 0;
+	for (i = 0; i < 20; i ++) {
+		w = a[i] + b[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+	cc = MUL15(w >> 8, 19);
+	d[19] &= 0xFF;
+	for (i = 0; i < 20; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+}
+
+/*
+ * Subtract one value from another in F255. Partial reduction is
+ * performed (down to less than twice the modulus).
+ */
+static void
+f255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * We actually compute a - b + 2*p, so that the final value is
+	 * necessarily positive.
+	 */
+	int i;
+	uint32_t cc, w;
+
+	cc = (uint32_t)-38;
+	for (i = 0; i < 20; i ++) {
+		w = a[i] - b[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = ARSH(w, 13);
+	}
+	cc = MUL15((w + 0x200) >> 8, 19);
+	d[19] &= 0xFF;
+	for (i = 0; i < 20; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+}
+
+/*
+ * Multiply an integer by the 'A24' constant (121665). Partial reduction
+ * is performed (down to less than twice the modulus).
+ */
+static void
+f255_mul_a24(uint32_t *d, const uint32_t *a)
+{
+	int i;
+	uint32_t cc, w;
+
+	cc = 0;
+	for (i = 0; i < 20; i ++) {
+		w = MUL15(a[i], 121665) + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+	cc = MUL15(w >> 8, 19);
+	d[19] &= 0xFF;
+	for (i = 0; i < 20; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x1FFF;
+		cc = w >> 13;
+	}
+}
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 20; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	uint32_t x1[20], x2[20], x3[20], z2[20], z3[20];
+	uint32_t a[20], aa[20], b[20], bb[20];
+	uint32_t c[20], d[20], e[20], da[20], cb[20];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	x1[19] = le8_to_le13(x1, G, 32);
+	memcpy(x3, x1, sizeof x1);
+	memset(z2, 0, sizeof z2);
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z3, 0, sizeof z3);
+	z3[0] = 1;
+
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+
+		f255_add(a, x2, z2);
+		f255_square(aa, a);
+		f255_sub(b, x2, z2);
+		f255_square(bb, b);
+		f255_sub(e, aa, bb);
+		f255_add(c, x3, z3);
+		f255_sub(d, x3, z3);
+		f255_mul(da, d, a);
+		f255_mul(cb, c, b);
+
+		/* obsolete
+		print_int("a ", a);
+		print_int("aa", aa);
+		print_int("b ", b);
+		print_int("bb", bb);
+		print_int("e ", e);
+		print_int("c ", c);
+		print_int("d ", d);
+		print_int("da", da);
+		print_int("cb", cb);
+		*/
+
+		f255_add(x3, da, cb);
+		f255_square(x3, x3);
+		f255_sub(z3, da, cb);
+		f255_square(z3, z3);
+		f255_mul(z3, z3, x1);
+		f255_mul(x2, aa, bb);
+		f255_mul_a24(z2, e);
+		f255_add(z2, z2, aa);
+		f255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_square(a, a);
+		f255_mul(a, a, z2);
+	}
+	memcpy(b, a, sizeof a);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_square(b, b);
+		}
+		f255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_square(b, b);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(b, z2, b);
+		}
+	}
+	f255_mul(x2, x2, b);
+	reduce_final_f255(x2);
+	le13_to_le8(G, 32, x2);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m15 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_m31.c b/third_party/bearssl/src/ec_c25519_m31.c
new file mode 100644
index 0000000..1dd6d51
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_m31.c
@@ -0,0 +1,800 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* obsolete
+#include <stdio.h>
+#include <stdlib.h>
+static void
+print_int(const char *name, const uint32_t *x)
+{
+	size_t u;
+	unsigned char tmp[40];
+
+	printf("%s = ", name);
+	for (u = 0; u < 9; u ++) {
+		if (x[u] > 0x3FFFFFFF) {
+			printf("INVALID:");
+			for (u = 0; u < 9; u ++) {
+				printf(" %08X", x[u]);
+			}
+			printf("\n");
+			return;
+		}
+	}
+	memset(tmp, 0, sizeof tmp);
+	for (u = 0; u < 9; u ++) {
+		uint64_t w;
+		int j, k;
+
+		w = x[u];
+		j = 30 * (int)u;
+		k = j & 7;
+		if (k != 0) {
+			w <<= k;
+			j -= k;
+		}
+		k = j >> 3;
+		for (j = 0; j < 8; j ++) {
+			tmp[39 - k - j] |= (unsigned char)w;
+			w >>= 8;
+		}
+	}
+	for (u = 8; u < 40; u ++) {
+		printf("%02X", tmp[u]);
+	}
+	printf("\n");
+}
+*/
+
+/*
+ * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
+ * that right-shifting a signed negative integer copies the sign bit
+ * (arithmetic right-shift). This is "implementation-defined behaviour",
+ * i.e. it is not undefined, but it may differ between compilers. Each
+ * compiler is supposed to document its behaviour in that respect. GCC
+ * explicitly defines that an arithmetic right shift is used. We expect
+ * all other compilers to do the same, because underlying CPU offer an
+ * arithmetic right shift opcode that could not be used otherwise.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)   (((uint32_t)(x) >> (n)) \
+                    | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#else
+#define ARSH(x, n)   ((*(int32_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Convert an integer from unsigned little-endian encoding to a sequence of
+ * 30-bit words in little-endian order. The final "partial" word is
+ * returned.
+ */
+static uint32_t
+le8_to_le30(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		uint32_t b;
+
+		b = *src ++;
+		if (acc_len < 22) {
+			acc |= b << acc_len;
+			acc_len += 8;
+		} else {
+			*dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF;
+			acc = b >> (30 - acc_len);
+			acc_len -= 22;
+		}
+	}
+	return acc;
+}
+
+/*
+ * Convert an integer (30-bit words, little-endian) to unsigned
+ * little-endian encoding. The total encoding length is provided; all
+ * the destination bytes will be filled.
+ */
+static void
+le30_to_le8(unsigned char *dst, size_t len, const uint32_t *src)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			uint32_t w;
+
+			w = *src ++;
+			*dst ++ = (unsigned char)(acc | (w << acc_len));
+			acc = w >> (8 - acc_len);
+			acc_len += 22;
+		} else {
+			*dst ++ = (unsigned char)acc;
+			acc >>= 8;
+			acc_len -= 8;
+		}
+	}
+}
+
+/*
+ * Multiply two integers. Source integers are represented as arrays of
+ * nine 30-bit words, for values up to 2^270-1. Result is encoded over
+ * 18 words of 30 bits each.
+ */
+static void
+mul9(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Maximum intermediate result is no more than
+	 * 10376293531797946367, which fits in 64 bits. Reason:
+	 *
+	 *   10376293531797946367 = 9 * (2^30-1)^2 + 9663676406
+	 *   10376293531797946367 < 9663676407 * 2^30
+	 *
+	 * Thus, adding together 9 products of 30-bit integers, with
+	 * a carry of at most 9663676406, yields an integer that fits
+	 * on 64 bits and generates a carry of at most 9663676406.
+	 */
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], b[0]);
+	t[ 1] = MUL31(a[0], b[1])
+		+ MUL31(a[1], b[0]);
+	t[ 2] = MUL31(a[0], b[2])
+		+ MUL31(a[1], b[1])
+		+ MUL31(a[2], b[0]);
+	t[ 3] = MUL31(a[0], b[3])
+		+ MUL31(a[1], b[2])
+		+ MUL31(a[2], b[1])
+		+ MUL31(a[3], b[0]);
+	t[ 4] = MUL31(a[0], b[4])
+		+ MUL31(a[1], b[3])
+		+ MUL31(a[2], b[2])
+		+ MUL31(a[3], b[1])
+		+ MUL31(a[4], b[0]);
+	t[ 5] = MUL31(a[0], b[5])
+		+ MUL31(a[1], b[4])
+		+ MUL31(a[2], b[3])
+		+ MUL31(a[3], b[2])
+		+ MUL31(a[4], b[1])
+		+ MUL31(a[5], b[0]);
+	t[ 6] = MUL31(a[0], b[6])
+		+ MUL31(a[1], b[5])
+		+ MUL31(a[2], b[4])
+		+ MUL31(a[3], b[3])
+		+ MUL31(a[4], b[2])
+		+ MUL31(a[5], b[1])
+		+ MUL31(a[6], b[0]);
+	t[ 7] = MUL31(a[0], b[7])
+		+ MUL31(a[1], b[6])
+		+ MUL31(a[2], b[5])
+		+ MUL31(a[3], b[4])
+		+ MUL31(a[4], b[3])
+		+ MUL31(a[5], b[2])
+		+ MUL31(a[6], b[1])
+		+ MUL31(a[7], b[0]);
+	t[ 8] = MUL31(a[0], b[8])
+		+ MUL31(a[1], b[7])
+		+ MUL31(a[2], b[6])
+		+ MUL31(a[3], b[5])
+		+ MUL31(a[4], b[4])
+		+ MUL31(a[5], b[3])
+		+ MUL31(a[6], b[2])
+		+ MUL31(a[7], b[1])
+		+ MUL31(a[8], b[0]);
+	t[ 9] = MUL31(a[1], b[8])
+		+ MUL31(a[2], b[7])
+		+ MUL31(a[3], b[6])
+		+ MUL31(a[4], b[5])
+		+ MUL31(a[5], b[4])
+		+ MUL31(a[6], b[3])
+		+ MUL31(a[7], b[2])
+		+ MUL31(a[8], b[1]);
+	t[10] = MUL31(a[2], b[8])
+		+ MUL31(a[3], b[7])
+		+ MUL31(a[4], b[6])
+		+ MUL31(a[5], b[5])
+		+ MUL31(a[6], b[4])
+		+ MUL31(a[7], b[3])
+		+ MUL31(a[8], b[2]);
+	t[11] = MUL31(a[3], b[8])
+		+ MUL31(a[4], b[7])
+		+ MUL31(a[5], b[6])
+		+ MUL31(a[6], b[5])
+		+ MUL31(a[7], b[4])
+		+ MUL31(a[8], b[3]);
+	t[12] = MUL31(a[4], b[8])
+		+ MUL31(a[5], b[7])
+		+ MUL31(a[6], b[6])
+		+ MUL31(a[7], b[5])
+		+ MUL31(a[8], b[4]);
+	t[13] = MUL31(a[5], b[8])
+		+ MUL31(a[6], b[7])
+		+ MUL31(a[7], b[6])
+		+ MUL31(a[8], b[5]);
+	t[14] = MUL31(a[6], b[8])
+		+ MUL31(a[7], b[7])
+		+ MUL31(a[8], b[6]);
+	t[15] = MUL31(a[7], b[8])
+		+ MUL31(a[8], b[7]);
+	t[16] = MUL31(a[8], b[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Square a 270-bit integer, represented as an array of nine 30-bit words.
+ * Result uses 18 words of 30 bits each.
+ */
+static void
+square9(uint32_t *d, const uint32_t *a)
+{
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], a[0]);
+	t[ 1] = ((MUL31(a[0], a[1])) << 1);
+	t[ 2] = MUL31(a[1], a[1])
+		+ ((MUL31(a[0], a[2])) << 1);
+	t[ 3] = ((MUL31(a[0], a[3])
+		+ MUL31(a[1], a[2])) << 1);
+	t[ 4] = MUL31(a[2], a[2])
+		+ ((MUL31(a[0], a[4])
+		+ MUL31(a[1], a[3])) << 1);
+	t[ 5] = ((MUL31(a[0], a[5])
+		+ MUL31(a[1], a[4])
+		+ MUL31(a[2], a[3])) << 1);
+	t[ 6] = MUL31(a[3], a[3])
+		+ ((MUL31(a[0], a[6])
+		+ MUL31(a[1], a[5])
+		+ MUL31(a[2], a[4])) << 1);
+	t[ 7] = ((MUL31(a[0], a[7])
+		+ MUL31(a[1], a[6])
+		+ MUL31(a[2], a[5])
+		+ MUL31(a[3], a[4])) << 1);
+	t[ 8] = MUL31(a[4], a[4])
+		+ ((MUL31(a[0], a[8])
+		+ MUL31(a[1], a[7])
+		+ MUL31(a[2], a[6])
+		+ MUL31(a[3], a[5])) << 1);
+	t[ 9] = ((MUL31(a[1], a[8])
+		+ MUL31(a[2], a[7])
+		+ MUL31(a[3], a[6])
+		+ MUL31(a[4], a[5])) << 1);
+	t[10] = MUL31(a[5], a[5])
+		+ ((MUL31(a[2], a[8])
+		+ MUL31(a[3], a[7])
+		+ MUL31(a[4], a[6])) << 1);
+	t[11] = ((MUL31(a[3], a[8])
+		+ MUL31(a[4], a[7])
+		+ MUL31(a[5], a[6])) << 1);
+	t[12] = MUL31(a[6], a[6])
+		+ ((MUL31(a[4], a[8])
+		+ MUL31(a[5], a[7])) << 1);
+	t[13] = ((MUL31(a[5], a[8])
+		+ MUL31(a[6], a[7])) << 1);
+	t[14] = MUL31(a[7], a[7])
+		+ ((MUL31(a[6], a[8])) << 1);
+	t[15] = ((MUL31(a[7], a[8])) << 1);
+	t[16] = MUL31(a[8], a[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Perform a "final reduction" in field F255 (field for Curve25519)
+ * The source value must be less than twice the modulus. If the value
+ * is not lower than the modulus, then the modulus is subtracted and
+ * this function returns 1; otherwise, it leaves it untouched and it
+ * returns 0.
+ */
+static uint32_t
+reduce_final_f255(uint32_t *d)
+{
+	uint32_t t[9];
+	uint32_t cc;
+	int i;
+
+	memcpy(t, d, sizeof t);
+	cc = 19;
+	for (i = 0; i < 9; i ++) {
+		uint32_t w;
+
+		w = t[i] + cc;
+		cc = w >> 30;
+		t[i] = w & 0x3FFFFFFF;
+	}
+	cc = t[8] >> 15;
+	t[8] &= 0x7FFF;
+	CCOPY(cc, d, t, sizeof t);
+	return cc;
+}
+
+/*
+ * Perform a multiplication of two integers modulo 2^255-19.
+ * Operands are arrays of 9 words, each containing 30 bits of data, in
+ * little-endian order. Input value may be up to 2^256-1; on output, value
+ * fits on 256 bits and is lower than twice the modulus.
+ */
+static void
+f255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[18], cc;
+	int i;
+
+	/*
+	 * Compute raw multiplication. All result words fit in 30 bits
+	 * each; upper word (t[17]) must fit on 2 bits, since the product
+	 * of two 256-bit integers must fit on 512 bits.
+	 */
+	mul9(t, a, b);
+
+	/*
+	 * Modular reduction: each high word is added where necessary.
+	 * Since the modulus is 2^255-19 and word 9 corresponds to
+	 * offset 9*30 = 270, word 9+k must be added to word k with
+	 * a factor of 19*2^15 = 622592. The extra bits in word 8 are also
+	 * added that way.
+	 *
+	 * Keeping the carry on 32 bits helps with 32-bit architectures,
+	 * and does not noticeably impact performance on 64-bit systems.
+	 */
+	cc = MUL15(t[8] >> 15, 19);  /* at most 19*(2^15-1) = 622573 */
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint64_t w;
+
+		w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592);
+		t[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = (uint32_t)(w >> 30);  /* at most 622592 */
+	}
+
+	/*
+	 * Original product was up to (2^256-1)^2, i.e. a 512-bit integer.
+	 * This was split into two parts (upper of 257 bits, lower of 255
+	 * bits), and the upper was added to the lower with a factor 19,
+	 * which means that the intermediate value is less than 77*2^255
+	 * (19*2^257 + 2^255). Therefore, the extra bits "t[8] >> 15" are
+	 * less than 77, and the initial carry cc is at most 76*19 = 1444.
+	 */
+	cc = MUL15(t[8] >> 15, 19);
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint32_t z;
+
+		z = t[i] + cc;
+		d[i] = z & 0x3FFFFFFF;
+		cc = z >> 30;
+	}
+
+	/*
+	 * Final result is at most 2^255 + 1443. In particular, the last
+	 * carry is necessarily 0, since t[8] was truncated to 15 bits.
+	 */
+}
+
+/*
+ * Perform a squaring of an integer modulo 2^255-19.
+ * Operands are arrays of 9 words, each containing 30 bits of data, in
+ * little-endian order. Input value may be up to 2^256-1; on output, value
+ * fits on 256 bits and is lower than twice the modulus.
+ */
+static void
+f255_square(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[18], cc;
+	int i;
+
+	/*
+	 * Compute raw squaring. All result words fit in 30 bits
+	 * each; upper word (t[17]) must fit on 2 bits, since the square
+	 * of a 256-bit integers must fit on 512 bits.
+	 */
+	square9(t, a);
+
+	/*
+	 * Modular reduction: each high word is added where necessary.
+	 * See f255_mul() for details on the reduction and carry limits.
+	 */
+	cc = MUL15(t[8] >> 15, 19);
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint64_t w;
+
+		w = (uint64_t)t[i] + (uint64_t)cc + MUL31(t[i + 9], 622592);
+		t[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = (uint32_t)(w >> 30);
+	}
+	cc = MUL15(t[8] >> 15, 19);
+	t[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		uint32_t z;
+
+		z = t[i] + cc;
+		d[i] = z & 0x3FFFFFFF;
+		cc = z >> 30;
+	}
+}
+
+/*
+ * Add two values in F255. Partial reduction is performed (down to less
+ * than twice the modulus).
+ */
+static void
+f255_add(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Since operand words fit on 30 bits, we can use 32-bit
+	 * variables throughout.
+	 */
+	int i;
+	uint32_t cc, w;
+
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		w = a[i] + b[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	cc = MUL15(w >> 15, 19);
+	d[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+}
+
+/*
+ * Subtract one value from another in F255. Partial reduction is
+ * performed (down to less than twice the modulus).
+ */
+static void
+f255_sub(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * We actually compute a - b + 2*p, so that the final value is
+	 * necessarily positive.
+	 */
+	int i;
+	uint32_t cc, w;
+
+	cc = (uint32_t)-38;
+	for (i = 0; i < 9; i ++) {
+		w = a[i] - b[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = ARSH(w, 30);
+	}
+	cc = MUL15((w + 0x10000) >> 15, 19);
+	d[8] &= 0x7FFF;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+}
+
+/*
+ * Multiply an integer by the 'A24' constant (121665). Partial reduction
+ * is performed (down to less than twice the modulus).
+ */
+static void
+f255_mul_a24(uint32_t *d, const uint32_t *a)
+{
+	int i;
+	uint64_t w;
+	uint32_t cc;
+
+	/*
+	 * a[] is over 256 bits, thus a[8] has length at most 16 bits.
+	 * We single out the processing of the last word: intermediate
+	 * value w is up to 121665*2^16, yielding a carry for the next
+	 * loop of at most 19*(121665*2^16/2^15) = 4623289.
+	 */
+	cc = 0;
+	for (i = 0; i < 8; i ++) {
+		w = MUL31(a[i], 121665) + (uint64_t)cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = (uint32_t)(w >> 30);
+	}
+	w = MUL31(a[8], 121665) + (uint64_t)cc;
+	d[8] = (uint32_t)w & 0x7FFF;
+	cc = MUL15((uint32_t)(w >> 15), 19);
+
+	for (i = 0; i < 9; i ++) {
+		uint32_t z;
+
+		z = d[i] + cc;
+		d[i] = z & 0x3FFFFFFF;
+		cc = z >> 30;
+	}
+}
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+static void
+cswap(uint32_t *a, uint32_t *b, uint32_t ctl)
+{
+	int i;
+
+	ctl = -ctl;
+	for (i = 0; i < 9; i ++) {
+		uint32_t aw, bw, tw;
+
+		aw = a[i];
+		bw = b[i];
+		tw = ctl & (aw ^ bw);
+		a[i] = aw ^ tw;
+		b[i] = bw ^ tw;
+	}
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	uint32_t x1[9], x2[9], x3[9], z2[9], z3[9];
+	uint32_t a[9], aa[9], b[9], bb[9];
+	uint32_t c[9], d[9], e[9], da[9], cb[9];
+	unsigned char k[32];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+	G[31] &= 0x7F;
+
+	/*
+	 * Initialise variables x1, x2, z2, x3 and z3. We set all of them
+	 * into Montgomery representation.
+	 */
+	x1[8] = le8_to_le30(x1, G, 32);
+	memcpy(x3, x1, sizeof x1);
+	memset(z2, 0, sizeof z2);
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z3, 0, sizeof z3);
+	z3[0] = 1;
+
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	/* obsolete
+	print_int("x1", x1);
+	*/
+
+	swap = 0;
+	for (i = 254; i >= 0; i --) {
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		cswap(x2, x3, swap);
+		cswap(z2, z3, swap);
+		swap = kt;
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+
+		f255_add(a, x2, z2);
+		f255_square(aa, a);
+		f255_sub(b, x2, z2);
+		f255_square(bb, b);
+		f255_sub(e, aa, bb);
+		f255_add(c, x3, z3);
+		f255_sub(d, x3, z3);
+		f255_mul(da, d, a);
+		f255_mul(cb, c, b);
+
+		/* obsolete
+		print_int("a ", a);
+		print_int("aa", aa);
+		print_int("b ", b);
+		print_int("bb", bb);
+		print_int("e ", e);
+		print_int("c ", c);
+		print_int("d ", d);
+		print_int("da", da);
+		print_int("cb", cb);
+		*/
+
+		f255_add(x3, da, cb);
+		f255_square(x3, x3);
+		f255_sub(z3, da, cb);
+		f255_square(z3, z3);
+		f255_mul(z3, z3, x1);
+		f255_mul(x2, aa, bb);
+		f255_mul_a24(z2, e);
+		f255_add(z2, z2, aa);
+		f255_mul(z2, e, z2);
+
+		/* obsolete
+		print_int("x2", x2);
+		print_int("z2", z2);
+		print_int("x3", x3);
+		print_int("z3", z3);
+		*/
+	}
+	cswap(x2, x3, swap);
+	cswap(z2, z3, swap);
+
+	/*
+	 * Inverse z2 with a modular exponentiation. This is a simple
+	 * square-and-multiply algorithm; we mutualise most non-squarings
+	 * since the exponent contains almost only ones.
+	 */
+	memcpy(a, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_square(a, a);
+		f255_mul(a, a, z2);
+	}
+	memcpy(b, a, sizeof a);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_square(b, b);
+		}
+		f255_mul(b, b, a);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_square(b, b);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(b, z2, b);
+		}
+	}
+	f255_mul(x2, x2, b);
+	reduce_final_f255(x2);
+	le30_to_le8(G, 32, x2);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m31 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_c25519_m62.c b/third_party/bearssl/src/ec_c25519_m62.c
new file mode 100644
index 0000000..6b058eb
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_m62.c
@@ -0,0 +1,605 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+/*
+ * A field element is encoded as five 64-bit integers, in basis 2^51.
+ * Limbs may be occasionally larger than 2^51, to save on carry
+ * propagation costs.
+ */
+
+#define MASK51   (((uint64_t)1 << 51) - (uint64_t)1)
+
+/*
+ * Swap two field elements, conditionally on a flag.
+ */
+static inline void
+f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl)
+{
+	uint64_t m, w;
+
+	m = -(uint64_t)ctl;
+	w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w;
+	w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w;
+	w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w;
+	w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w;
+	w = m & (a[4] ^ b[4]); a[4] ^= w; b[4] ^= w;
+}
+
+/*
+ * Addition with no carry propagation. Limbs double in size.
+ */
+static inline void
+f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	d[0] = a[0] + b[0];
+	d[1] = a[1] + b[1];
+	d[2] = a[2] + b[2];
+	d[3] = a[3] + b[3];
+	d[4] = a[4] + b[4];
+}
+
+/*
+ * Subtraction.
+ * On input, limbs must fit on 60 bits each. On output, result is
+ * partially reduced, with max value 2^255+19456; moreover, all
+ * limbs will fit on 51 bits, except the low limb, which may have
+ * value up to 2^51+19455.
+ */
+static inline void
+f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	uint64_t cc, w;
+
+	/*
+	 * We compute d = (2^255-19)*1024 + a - b. Since the limbs
+	 * fit on 60 bits, the maximum value of operands are slightly
+	 * more than 2^264, but much less than 2^265-19456. This
+	 * ensures that the result is positive.
+	 */
+
+	/*
+	 * Initial carry is 19456, since we add 2^265-19456. Each
+	 * individual subtraction may yield a carry up to 513.
+	 */
+	w = a[0] - b[0] - 19456;
+	d[0] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	w = a[1] - b[1] - cc;
+	d[1] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	w = a[2] - b[2] - cc;
+	d[2] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	w = a[3] - b[3] - cc;
+	d[3] = w & MASK51;
+	cc = -(w >> 51) & 0x3FF;
+	d[4] = ((uint64_t)1 << 61) + a[4] - b[4] - cc;
+
+	/*
+	 * Partial reduction. The intermediate result may be up to
+	 * slightly above 2^265, but less than 2^265+2^255. When we
+	 * truncate to 255 bits, the upper bits will be at most 1024.
+	 */
+	d[0] += 19 * (d[4] >> 51);
+	d[4] &= MASK51;
+}
+
+/*
+ * UMUL51(hi, lo, x, y) computes:
+ *
+ *   hi = floor((x * y) / (2^51))
+ *   lo = x * y mod 2^51
+ *
+ * Note that lo < 2^51, but "hi" may be larger, if the input operands are
+ * larger.
+ */
+#if BR_INT128
+
+#define UMUL51(hi, lo, x, y)   do { \
+		unsigned __int128 umul_tmp; \
+		umul_tmp = (unsigned __int128)(x) * (unsigned __int128)(y); \
+		(hi) = (uint64_t)(umul_tmp >> 51); \
+		(lo) = (uint64_t)umul_tmp & MASK51; \
+	} while (0)
+
+#elif BR_UMUL128
+
+#define UMUL51(hi, lo, x, y)   do { \
+		uint64_t umul_hi, umul_lo; \
+		umul_lo = _umul128((x), (y), &umul_hi); \
+		(hi) = (umul_hi << 13) | (umul_lo >> 51); \
+		(lo) = umul_lo & MASK51; \
+	} while (0)
+
+#endif
+
+/*
+ * Multiplication.
+ * On input, limbs must fit on 54 bits each.
+ * On output, limb 0 is at most 2^51 + 155647, and other limbs fit
+ * on 51 bits each.
+ */
+static inline void
+f255_mul(uint64_t *d, uint64_t *a, uint64_t *b)
+{
+	uint64_t t[10], hi, lo, w, cc;
+
+	/*
+	 * Perform cross products, accumulating values without carry
+	 * propagation.
+	 *
+	 * Since input limbs fit on 54 bits each, each individual
+	 * UMUL51 will produce a "hi" of less than 2^57. The maximum
+	 * sum will be at most 5*(2^57-1) + 4*(2^51-1) (for t[5]),
+	 * i.e. less than 324*2^51.
+	 */
+
+	UMUL51(t[1], t[0], a[0], b[0]);
+
+	UMUL51(t[2], lo, a[1], b[0]); t[1] += lo;
+	UMUL51(hi, lo, a[0], b[1]); t[1] += lo; t[2] += hi;
+
+	UMUL51(t[3], lo, a[2], b[0]); t[2] += lo;
+	UMUL51(hi, lo, a[1], b[1]); t[2] += lo; t[3] += hi;
+	UMUL51(hi, lo, a[0], b[2]); t[2] += lo; t[3] += hi;
+
+	UMUL51(t[4], lo, a[3], b[0]); t[3] += lo;
+	UMUL51(hi, lo, a[2], b[1]); t[3] += lo; t[4] += hi;
+	UMUL51(hi, lo, a[1], b[2]); t[3] += lo; t[4] += hi;
+	UMUL51(hi, lo, a[0], b[3]); t[3] += lo; t[4] += hi;
+
+	UMUL51(t[5], lo, a[4], b[0]); t[4] += lo;
+	UMUL51(hi, lo, a[3], b[1]); t[4] += lo; t[5] += hi;
+	UMUL51(hi, lo, a[2], b[2]); t[4] += lo; t[5] += hi;
+	UMUL51(hi, lo, a[1], b[3]); t[4] += lo; t[5] += hi;
+	UMUL51(hi, lo, a[0], b[4]); t[4] += lo; t[5] += hi;
+
+	UMUL51(t[6], lo, a[4], b[1]); t[5] += lo;
+	UMUL51(hi, lo, a[3], b[2]); t[5] += lo; t[6] += hi;
+	UMUL51(hi, lo, a[2], b[3]); t[5] += lo; t[6] += hi;
+	UMUL51(hi, lo, a[1], b[4]); t[5] += lo; t[6] += hi;
+
+	UMUL51(t[7], lo, a[4], b[2]); t[6] += lo;
+	UMUL51(hi, lo, a[3], b[3]); t[6] += lo; t[7] += hi;
+	UMUL51(hi, lo, a[2], b[4]); t[6] += lo; t[7] += hi;
+
+	UMUL51(t[8], lo, a[4], b[3]); t[7] += lo;
+	UMUL51(hi, lo, a[3], b[4]); t[7] += lo; t[8] += hi;
+
+	UMUL51(t[9], lo, a[4], b[4]); t[8] += lo;
+
+	/*
+	 * The upper words t[5]..t[9] are folded back into the lower
+	 * words, using the rule that 2^255 = 19 in the field.
+	 *
+	 * Since each t[i] is less than 324*2^51, the additions below
+	 * will yield less than 6480*2^51 in each limb; this fits in
+	 * 64 bits (6480*2^51 < 8192*2^51 = 2^64), hence there is
+	 * no overflow.
+	 */
+	t[0] += 19 * t[5];
+	t[1] += 19 * t[6];
+	t[2] += 19 * t[7];
+	t[3] += 19 * t[8];
+	t[4] += 19 * t[9];
+
+	/*
+	 * Propagate carries.
+	 */
+	w = t[0];
+	d[0] = w & MASK51;
+	cc = w >> 51;
+	w = t[1] + cc;
+	d[1] = w & MASK51;
+	cc = w >> 51;
+	w = t[2] + cc;
+	d[2] = w & MASK51;
+	cc = w >> 51;
+	w = t[3] + cc;
+	d[3] = w & MASK51;
+	cc = w >> 51;
+	w = t[4] + cc;
+	d[4] = w & MASK51;
+	cc = w >> 51;
+
+	/*
+	 * Since the limbs were 64-bit values, the top carry is at
+	 * most 8192 (in practice, that cannot be reached). We simply
+	 * performed a partial reduction.
+	 */
+	d[0] += 19 * cc;
+}
+
+/*
+ * Multiplication by A24 = 121665.
+ * Input must have limbs of 60 bits at most.
+ */
+static inline void
+f255_mul_a24(uint64_t *d, const uint64_t *a)
+{
+	uint64_t t[5], cc, w;
+
+	/*
+	 * 121665 = 15 * 8111. We first multiply by 15, with carry
+	 * propagation and partial reduction.
+	 */
+	w = a[0] * 15;
+	t[0] = w & MASK51;
+	cc = w >> 51;
+	w = a[1] * 15 + cc;
+	t[1] = w & MASK51;
+	cc = w >> 51;
+	w = a[2] * 15 + cc;
+	t[2] = w & MASK51;
+	cc = w >> 51;
+	w = a[3] * 15 + cc;
+	t[3] = w & MASK51;
+	cc = w >> 51;
+	w = a[4] * 15 + cc;
+	t[4] = w & MASK51;
+	t[0] += 19 * (w >> 51);
+
+	/*
+	 * Then multiplication by 8111. At that point, we known that
+	 * t[0] is less than 2^51 + 19*8192, and other limbs are less
+	 * than 2^51; thus, there will be no overflow.
+	 */
+	w = t[0] * 8111;
+	d[0] = w & MASK51;
+	cc = w >> 51;
+	w = t[1] * 8111 + cc;
+	d[1] = w & MASK51;
+	cc = w >> 51;
+	w = t[2] * 8111 + cc;
+	d[2] = w & MASK51;
+	cc = w >> 51;
+	w = t[3] * 8111 + cc;
+	d[3] = w & MASK51;
+	cc = w >> 51;
+	w = t[4] * 8111 + cc;
+	d[4] = w & MASK51;
+	d[0] += 19 * (w >> 51);
+}
+
+/*
+ * Finalize reduction.
+ * On input, limbs must fit on 51 bits, except possibly the low limb,
+ * which may be slightly above 2^51.
+ */
+static inline void
+f255_final_reduce(uint64_t *a)
+{
+	uint64_t t[5], cc, w;
+
+	/*
+	 * We add 19. If the result (in t[]) is below 2^255, then a[]
+	 * is already less than 2^255-19, thus already reduced.
+	 * Otherwise, we subtract 2^255 from t[], in which case we
+	 * have t = a - (2^255-19), and that's our result.
+	 */
+	w = a[0] + 19;
+	t[0] = w & MASK51;
+	cc = w >> 51;
+	w = a[1] + cc;
+	t[1] = w & MASK51;
+	cc = w >> 51;
+	w = a[2] + cc;
+	t[2] = w & MASK51;
+	cc = w >> 51;
+	w = a[3] + cc;
+	t[3] = w & MASK51;
+	cc = w >> 51;
+	w = a[4] + cc;
+	t[4] = w & MASK51;
+	cc = w >> 51;
+
+	/*
+	 * The bit 255 of t is in cc. If that bit is 0, when a[] must
+	 * be unchanged; otherwise, it must be replaced with t[].
+	 */
+	cc = -cc;
+	a[0] ^= cc & (a[0] ^ t[0]);
+	a[1] ^= cc & (a[1] ^ t[1]);
+	a[2] ^= cc & (a[2] ^ t[2]);
+	a[3] ^= cc & (a[3] ^ t[3]);
+	a[4] ^= cc & (a[4] ^ t[4]);
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	unsigned char k[32];
+	uint64_t x1[5], x2[5], z2[5], x3[5], z3[5];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+
+	/*
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared; the "& MASK51" in the initialization for
+	 * x1[4] clears that bit.
+	 */
+	x1[0] = br_dec64le(&G[0]) & MASK51;
+	x1[1] = (br_dec64le(&G[6]) >> 3) & MASK51;
+	x1[2] = (br_dec64le(&G[12]) >> 6) & MASK51;
+	x1[3] = (br_dec64le(&G[19]) >> 1) & MASK51;
+	x1[4] = (br_dec64le(&G[24]) >> 12) & MASK51;
+
+	/*
+	 * We can use memset() to clear values, because exact-width types
+	 * like uint64_t are guaranteed to have no padding bits or
+	 * trap representations.
+	 */
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z2, 0, sizeof z2);
+	memcpy(x3, x1, sizeof x1);
+	memcpy(z3, x2, sizeof x2);
+
+	/*
+	 * The multiplier is provided in big-endian notation, and
+	 * possibly shorter than 32 bytes.
+	 */
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	swap = 0;
+
+	for (i = 254; i >= 0; i --) {
+		uint64_t a[5], aa[5], b[5], bb[5], e[5];
+		uint64_t c[5], d[5], da[5], cb[5];
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		f255_cswap(x2, x3, swap);
+		f255_cswap(z2, z3, swap);
+		swap = kt;
+
+		/*
+		 * At that point, limbs of x_2 and z_2 are assumed to fit
+		 * on at most 52 bits each.
+		 *
+		 * Each f255_add() adds one bit to the maximum range of
+		 * the values, but f255_sub() and f255_mul() bring back
+		 * the limbs into 52 bits. All f255_add() outputs are
+		 * used only as inputs for f255_mul(), which ensures
+		 * that limbs remain in the proper range.
+		 */
+
+		/* A = x_2 + z_2   -- limbs fit on 53 bits each */
+		f255_add(a, x2, z2);
+
+		/* AA = A^2 */
+		f255_mul(aa, a, a);
+
+		/* B = x_2 - z_2 */
+		f255_sub(b, x2, z2);
+
+		/* BB = B^2 */
+		f255_mul(bb, b, b);
+
+		/* E = AA - BB */
+		f255_sub(e, aa, bb);
+
+		/* C = x_3 + z_3   -- limbs fit on 53 bits each */
+		f255_add(c, x3, z3);
+
+		/* D = x_3 - z_3 */
+		f255_sub(d, x3, z3);
+
+		/* DA = D * A */
+		f255_mul(da, d, a);
+
+		/* CB = C * B */
+		f255_mul(cb, c, b);
+
+		/* x_3 = (DA + CB)^2 */
+		f255_add(x3, da, cb);
+		f255_mul(x3, x3, x3);
+
+		/* z_3 = x_1 * (DA - CB)^2 */
+		f255_sub(z3, da, cb);
+		f255_mul(z3, z3, z3);
+		f255_mul(z3, x1, z3);
+
+		/* x_2 = AA * BB */
+		f255_mul(x2, aa, bb);
+
+		/* z_2 = E * (AA + a24 * E) */
+		f255_mul_a24(z2, e);
+		f255_add(z2, aa, z2);
+		f255_mul(z2, e, z2);
+	}
+
+	f255_cswap(x2, x3, swap);
+	f255_cswap(z2, z3, swap);
+
+	/*
+	 * Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize
+	 * most non-squarings. We use x1 and x3, now useless, as temporaries.
+	 */
+	memcpy(x1, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_mul(x1, x1, x1);
+		f255_mul(x1, x1, z2);
+	}
+	memcpy(x3, x1, sizeof x1);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_mul(x3, x3, x3);
+		}
+		f255_mul(x3, x3, x1);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_mul(x3, x3, x3);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(x3, z2, x3);
+		}
+	}
+
+	/*
+	 * Compute x2/z2. We have 1/z2 in x3.
+	 */
+	f255_mul(x2, x2, x3);
+	f255_final_reduce(x2);
+
+	/*
+	 * Encode the final x2 value in little-endian. We first assemble
+	 * the limbs into 64-bit values.
+	 */
+	x2[0] |= x2[1] << 51;
+	x2[1] = (x2[1] >> 13) | (x2[2] << 38);
+	x2[2] = (x2[2] >> 26) | (x2[3] << 25);
+	x2[3] = (x2[3] >> 39) | (x2[4] << 12);
+	br_enc64le(G, x2[0]);
+	br_enc64le(G + 8, x2[1]);
+	br_enc64le(G + 16, x2[2]);
+	br_enc64le(G + 24, x2[3]);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m62 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m62_get(void)
+{
+	return &br_ec_c25519_m62;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m62_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ec_c25519_m64.c b/third_party/bearssl/src/ec_c25519_m64.c
new file mode 100644
index 0000000..df48834
--- /dev/null
+++ b/third_party/bearssl/src/ec_c25519_m64.c
@@ -0,0 +1,831 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return GEN;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return ORDER;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 0;
+}
+
+/*
+ * A field element is encoded as four 64-bit integers, in basis 2^63.
+ * Operations return partially reduced values, which may range up to
+ * 2^255+37.
+ */
+
+#define MASK63   (((uint64_t)1 << 63) - (uint64_t)1)
+
+/*
+ * Swap two field elements, conditionally on a flag.
+ */
+static inline void
+f255_cswap(uint64_t *a, uint64_t *b, uint32_t ctl)
+{
+	uint64_t m, w;
+
+	m = -(uint64_t)ctl;
+	w = m & (a[0] ^ b[0]); a[0] ^= w; b[0] ^= w;
+	w = m & (a[1] ^ b[1]); a[1] ^= w; b[1] ^= w;
+	w = m & (a[2] ^ b[2]); a[2] ^= w; b[2] ^= w;
+	w = m & (a[3] ^ b[3]); a[3] ^= w; b[3] ^= w;
+}
+
+/*
+ * Addition in the field.
+ */
+static inline void
+f255_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3, cc;
+	unsigned __int128 z;
+
+	z = (unsigned __int128)a[0] + (unsigned __int128)b[0];
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] + (unsigned __int128)b[1] + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] + (unsigned __int128)b[2] + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[3] + (unsigned __int128)b[3] + (z >> 64);
+	t3 = (uint64_t)z & MASK63;
+	cc = (uint64_t)(z >> 63);
+
+	/*
+	 * Since operands are at most 2^255+37, the sum is at most
+	 * 2^256+74; thus, the carry cc is equal to 0, 1 or 2.
+	 *
+	 * We use: 2^255 = 19 mod p.
+	 * Since we add 0, 19 or 38 to a value that fits on 255 bits,
+	 * the result is at most 2^255+37.
+	 */
+	z = (unsigned __int128)t0 + (unsigned __int128)(19 * cc);
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = t3 + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, cc;
+	unsigned char k;
+
+	k = _addcarry_u64(0, a[0], b[0], &t0);
+	k = _addcarry_u64(k, a[1], b[1], &t1);
+	k = _addcarry_u64(k, a[2], b[2], &t2);
+	k = _addcarry_u64(k, a[3], b[3], &t3);
+	cc = (k << 1) + (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Since operands are at most 2^255+37, the sum is at most
+	 * 2^256+74; thus, the carry cc is equal to 0, 1 or 2.
+	 *
+	 * We use: 2^255 = 19 mod p.
+	 * Since we add 0, 19 or 38 to a value that fits on 255 bits,
+	 * the result is at most 2^255+37.
+	 */
+	k = _addcarry_u64(0, t0, 19 * cc, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Subtraction.
+ */
+static inline void
+f255_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	/*
+	 * We compute t = 2^256 - 38 + a - b, which is necessarily
+	 * positive but lower than 2^256 + 2^255, since a <= 2^255 + 37
+	 * and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending
+	 * on the two upper bits of t (bits 255 and 256).
+	 */
+
+	uint64_t t0, t1, t2, t3, t4, cc;
+	unsigned __int128 z;
+
+	z = (unsigned __int128)a[0] - (unsigned __int128)b[0] - 38;
+	t0 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+	z = (unsigned __int128)a[1] - (unsigned __int128)b[1]
+		- (unsigned __int128)cc;
+	t1 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+	z = (unsigned __int128)a[2] - (unsigned __int128)b[2]
+		- (unsigned __int128)cc;
+	t2 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+	z = (unsigned __int128)a[3] - (unsigned __int128)b[3]
+		- (unsigned __int128)cc;
+	t3 = (uint64_t)z;
+	t4 = 1 + (uint64_t)(z >> 64);
+
+	/*
+	 * We have a 257-bit result. The two top bits can be 00, 01 or 10,
+	 * but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1).
+	 * Therefore, we can truncate to 255 bits, and add 0, 19 or 38.
+	 * This guarantees that the result is at most 2^255+37.
+	 */
+	cc = (38 & -t4) + (19 & -(t3 >> 63));
+	t3 &= MASK63;
+	z = (unsigned __int128)t0 + (unsigned __int128)cc;
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = t3 + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	/*
+	 * We compute t = 2^256 - 38 + a - b, which is necessarily
+	 * positive but lower than 2^256 + 2^255, since a <= 2^255 + 37
+	 * and b <= 2^255 + 37. We then subtract 0, p or 2*p, depending
+	 * on the two upper bits of t (bits 255 and 256).
+	 */
+
+	uint64_t t0, t1, t2, t3, t4;
+	unsigned char k;
+
+	k = _subborrow_u64(0, a[0], b[0], &t0);
+	k = _subborrow_u64(k, a[1], b[1], &t1);
+	k = _subborrow_u64(k, a[2], b[2], &t2);
+	k = _subborrow_u64(k, a[3], b[3], &t3);
+	(void)_subborrow_u64(k, 1, 0, &t4);
+
+	k = _subborrow_u64(0, t0, 38, &t0);
+	k = _subborrow_u64(k, t1, 0, &t1);
+	k = _subborrow_u64(k, t2, 0, &t2);
+	k = _subborrow_u64(k, t3, 0, &t3);
+	(void)_subborrow_u64(k, t4, 0, &t4);
+
+	/*
+	 * We have a 257-bit result. The two top bits can be 00, 01 or 10,
+	 * but not 11 (value t <= 2^256 - 38 + 2^255 + 37 = 2^256 + 2^255 - 1).
+	 * Therefore, we can truncate to 255 bits, and add 0, 19 or 38.
+	 * This guarantees that the result is at most 2^255+37.
+	 */
+	t4 = (38 & -t4) + (19 & -(t3 >> 63));
+	t3 &= MASK63;
+	k = _addcarry_u64(0, t0, t4, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Multiplication.
+ */
+static inline void
+f255_mul(uint64_t *d, uint64_t *a, uint64_t *b)
+{
+#if BR_INT128
+
+	unsigned __int128 z;
+	uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th;
+
+	/*
+	 * Compute the product a*b over plain integers.
+	 */
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[0];
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[1] + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[2] + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[0] * (unsigned __int128)b[3] + (z >> 64);
+	t3 = (uint64_t)z;
+	t4 = (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[0]
+		+ (unsigned __int128)t1;
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[1]
+		+ (unsigned __int128)t2 + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[2]
+		+ (unsigned __int128)t3 + (z >> 64);
+	t3 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * (unsigned __int128)b[3]
+		+ (unsigned __int128)t4 + (z >> 64);
+	t4 = (uint64_t)z;
+	t5 = (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[0]
+		+ (unsigned __int128)t2;
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[1]
+		+ (unsigned __int128)t3 + (z >> 64);
+	t3 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[2]
+		+ (unsigned __int128)t4 + (z >> 64);
+	t4 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * (unsigned __int128)b[3]
+		+ (unsigned __int128)t5 + (z >> 64);
+	t5 = (uint64_t)z;
+	t6 = (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[0]
+		+ (unsigned __int128)t3;
+	t3 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[1]
+		+ (unsigned __int128)t4 + (z >> 64);
+	t4 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[2]
+		+ (unsigned __int128)t5 + (z >> 64);
+	t5 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * (unsigned __int128)b[3]
+		+ (unsigned __int128)t6 + (z >> 64);
+	t6 = (uint64_t)z;
+	t7 = (uint64_t)(z >> 64);
+
+	/*
+	 * Modulo p, we have:
+	 *
+	 *   2^255 = 19
+	 *   2^510 = 19*19 = 361
+	 *
+	 * We split the intermediate t into three parts, in basis
+	 * 2^255. The low one will be in t0..t3; the middle one in t4..t7.
+	 * The upper one can only be a single bit (th), since the
+	 * multiplication operands are at most 2^255+37 each.
+	 */
+	th = t7 >> 62;
+	t7 = ((t7 << 1) | (t6 >> 63)) & MASK63;
+	t6 = (t6 << 1) | (t5 >> 63);
+	t5 = (t5 << 1) | (t4 >> 63);
+	t4 = (t4 << 1) | (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Multiply the middle part (t4..t7) by 19. We truncate it to
+	 * 255 bits; the extra bits will go along with th.
+	 */
+	z = (unsigned __int128)t4 * 19;
+	t4 = (uint64_t)z;
+	z = (unsigned __int128)t5 * 19 + (z >> 64);
+	t5 = (uint64_t)z;
+	z = (unsigned __int128)t6 * 19 + (z >> 64);
+	t6 = (uint64_t)z;
+	z = (unsigned __int128)t7 * 19 + (z >> 64);
+	t7 = (uint64_t)z & MASK63;
+
+	th = (361 & -th) + (19 * (uint64_t)(z >> 63));
+
+	/*
+	 * Add elements together.
+	 * At this point:
+	 *   t0..t3 fits on 255 bits.
+	 *   t4..t7 fits on 255 bits.
+	 *   th <= 361 + 342 = 703.
+	 */
+	z = (unsigned __int128)t0 + (unsigned __int128)t4
+		+ (unsigned __int128)th;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)t1 + (unsigned __int128)t5 + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)t2 + (unsigned __int128)t6 + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)t3 + (unsigned __int128)t7 + (z >> 64);
+	t3 = (uint64_t)z & MASK63;
+	th = (uint64_t)(z >> 63);
+
+	/*
+	 * Since the sum is at most 2^256 + 703, the two upper bits, in th,
+	 * can only have value 0, 1 or 2. We just add th*19, which
+	 * guarantees a result of at most 2^255+37.
+	 */
+	z = (unsigned __int128)t0 + (19 * th);
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = t3 + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, t4, t5, t6, t7, th;
+	uint64_t h0, h1, h2, h3;
+	unsigned char k;
+
+	/*
+	 * Compute the product a*b over plain integers.
+	 */
+	t0 = _umul128(a[0], b[0], &h0);
+	t1 = _umul128(a[0], b[1], &h1);
+	k = _addcarry_u64(0, t1, h0, &t1);
+	t2 = _umul128(a[0], b[2], &h2);
+	k = _addcarry_u64(k, t2, h1, &t2);
+	t3 = _umul128(a[0], b[3], &h3);
+	k = _addcarry_u64(k, t3, h2, &t3);
+	(void)_addcarry_u64(k, h3, 0, &t4);
+
+	k = _addcarry_u64(0, _umul128(a[1], b[0], &h0), t1, &t1);
+	k = _addcarry_u64(k, _umul128(a[1], b[1], &h1), t2, &t2);
+	k = _addcarry_u64(k, _umul128(a[1], b[2], &h2), t3, &t3);
+	k = _addcarry_u64(k, _umul128(a[1], b[3], &h3), t4, &t4);
+	t5 = k;
+	k = _addcarry_u64(0, t2, h0, &t2);
+	k = _addcarry_u64(k, t3, h1, &t3);
+	k = _addcarry_u64(k, t4, h2, &t4);
+	(void)_addcarry_u64(k, t5, h3, &t5);
+
+	k = _addcarry_u64(0, _umul128(a[2], b[0], &h0), t2, &t2);
+	k = _addcarry_u64(k, _umul128(a[2], b[1], &h1), t3, &t3);
+	k = _addcarry_u64(k, _umul128(a[2], b[2], &h2), t4, &t4);
+	k = _addcarry_u64(k, _umul128(a[2], b[3], &h3), t5, &t5);
+	t6 = k;
+	k = _addcarry_u64(0, t3, h0, &t3);
+	k = _addcarry_u64(k, t4, h1, &t4);
+	k = _addcarry_u64(k, t5, h2, &t5);
+	(void)_addcarry_u64(k, t6, h3, &t6);
+
+	k = _addcarry_u64(0, _umul128(a[3], b[0], &h0), t3, &t3);
+	k = _addcarry_u64(k, _umul128(a[3], b[1], &h1), t4, &t4);
+	k = _addcarry_u64(k, _umul128(a[3], b[2], &h2), t5, &t5);
+	k = _addcarry_u64(k, _umul128(a[3], b[3], &h3), t6, &t6);
+	t7 = k;
+	k = _addcarry_u64(0, t4, h0, &t4);
+	k = _addcarry_u64(k, t5, h1, &t5);
+	k = _addcarry_u64(k, t6, h2, &t6);
+	(void)_addcarry_u64(k, t7, h3, &t7);
+
+	/*
+	 * Modulo p, we have:
+	 *
+	 *   2^255 = 19
+	 *   2^510 = 19*19 = 361
+	 *
+	 * We split the intermediate t into three parts, in basis
+	 * 2^255. The low one will be in t0..t3; the middle one in t4..t7.
+	 * The upper one can only be a single bit (th), since the
+	 * multiplication operands are at most 2^255+37 each.
+	 */
+	th = t7 >> 62;
+	t7 = ((t7 << 1) | (t6 >> 63)) & MASK63;
+	t6 = (t6 << 1) | (t5 >> 63);
+	t5 = (t5 << 1) | (t4 >> 63);
+	t4 = (t4 << 1) | (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Multiply the middle part (t4..t7) by 19. We truncate it to
+	 * 255 bits; the extra bits will go along with th.
+	 */
+	t4 = _umul128(t4, 19, &h0);
+	t5 = _umul128(t5, 19, &h1);
+	t6 = _umul128(t6, 19, &h2);
+	t7 = _umul128(t7, 19, &h3);
+	k = _addcarry_u64(0, t5, h0, &t5);
+	k = _addcarry_u64(k, t6, h1, &t6);
+	k = _addcarry_u64(k, t7, h2, &t7);
+	(void)_addcarry_u64(k, h3, 0, &h3);
+	th = (361 & -th) + (19 * ((h3 << 1) + (t7 >> 63)));
+	t7 &= MASK63;
+
+	/*
+	 * Add elements together.
+	 * At this point:
+	 *   t0..t3 fits on 255 bits.
+	 *   t4..t7 fits on 255 bits.
+	 *   th <= 361 + 342 = 703.
+	 */
+	k = _addcarry_u64(0, t0, t4, &t0);
+	k = _addcarry_u64(k, t1, t5, &t1);
+	k = _addcarry_u64(k, t2, t6, &t2);
+	k = _addcarry_u64(k, t3, t7, &t3);
+	t4 = k;
+	k = _addcarry_u64(0, t0, th, &t0);
+	k = _addcarry_u64(k, t1, 0, &t1);
+	k = _addcarry_u64(k, t2, 0, &t2);
+	k = _addcarry_u64(k, t3, 0, &t3);
+	(void)_addcarry_u64(k, t4, 0, &t4);
+
+	th = (t4 << 1) + (t3 >> 63);
+	t3 &= MASK63;
+
+	/*
+	 * Since the sum is at most 2^256 + 703, the two upper bits, in th,
+	 * can only have value 0, 1 or 2. We just add th*19, which
+	 * guarantees a result of at most 2^255+37.
+	 */
+	k = _addcarry_u64(0, t0, 19 * th, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Multiplication by A24 = 121665.
+ */
+static inline void
+f255_mul_a24(uint64_t *d, const uint64_t *a)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3;
+	unsigned __int128 z;
+
+	z = (unsigned __int128)a[0] * 121665;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] * 121665 + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] * 121665 + (z >> 64);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[3] * 121665 + (z >> 64);
+	t3 = (uint64_t)z & MASK63;
+
+	z = (unsigned __int128)t0 + (19 * (uint64_t)(z >> 63));
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	t2 = (uint64_t)z;
+	t3 = t3 + (uint64_t)(z >> 64);
+
+	z = (unsigned __int128)t0 + (19 & -(t3 >> 63));
+	d[0] = (uint64_t)z;
+	z = (unsigned __int128)t1 + (z >> 64);
+	d[1] = (uint64_t)z;
+	z = (unsigned __int128)t2 + (z >> 64);
+	d[2] = (uint64_t)z;
+	d[3] = (t3 & MASK63) + (uint64_t)(z >> 64);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, t4, h0, h1, h2, h3;
+	unsigned char k;
+
+	t0 = _umul128(a[0], 121665, &h0);
+	t1 = _umul128(a[1], 121665, &h1);
+	k = _addcarry_u64(0, t1, h0, &t1);
+	t2 = _umul128(a[2], 121665, &h2);
+	k = _addcarry_u64(k, t2, h1, &t2);
+	t3 = _umul128(a[3], 121665, &h3);
+	k = _addcarry_u64(k, t3, h2, &t3);
+	(void)_addcarry_u64(k, h3, 0, &t4);
+
+	t4 = (t4 << 1) + (t3 >> 63);
+	t3 &= MASK63;
+	k = _addcarry_u64(0, t0, 19 * t4, &t0);
+	k = _addcarry_u64(k, t1, 0, &t1);
+	k = _addcarry_u64(k, t2, 0, &t2);
+	(void)_addcarry_u64(k, t3, 0, &t3);
+
+	t4 = 19 & -(t3 >> 63);
+	t3 &= MASK63;
+	k = _addcarry_u64(0, t0, t4, &d[0]);
+	k = _addcarry_u64(k, t1, 0, &d[1]);
+	k = _addcarry_u64(k, t2, 0, &d[2]);
+	(void)_addcarry_u64(k, t3, 0, &d[3]);
+
+#endif
+}
+
+/*
+ * Finalize reduction.
+ */
+static inline void
+f255_final_reduce(uint64_t *a)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3, m;
+	unsigned __int128 z;
+
+	/*
+	 * We add 19. If the result (in t) is below 2^255, then a[]
+	 * is already less than 2^255-19, thus already reduced.
+	 * Otherwise, we subtract 2^255 from t[], in which case we
+	 * have t = a - (2^255-19), and that's our result.
+	 */
+	z = (unsigned __int128)a[0] + 19;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] + (z >> 64);
+	t2 = (uint64_t)z;
+	t3 = a[3] + (uint64_t)(z >> 64);
+
+	m = -(t3 >> 63);
+	t3 &= MASK63;
+	a[0] ^= m & (a[0] ^ t0);
+	a[1] ^= m & (a[1] ^ t1);
+	a[2] ^= m & (a[2] ^ t2);
+	a[3] ^= m & (a[3] ^ t3);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, m;
+	unsigned char k;
+
+	/*
+	 * We add 19. If the result (in t) is below 2^255, then a[]
+	 * is already less than 2^255-19, thus already reduced.
+	 * Otherwise, we subtract 2^255 from t[], in which case we
+	 * have t = a - (2^255-19), and that's our result.
+	 */
+	k = _addcarry_u64(0, a[0], 19, &t0);
+	k = _addcarry_u64(k, a[1], 0, &t1);
+	k = _addcarry_u64(k, a[2], 0, &t2);
+	(void)_addcarry_u64(k, a[3], 0, &t3);
+
+	m = -(t3 >> 63);
+	t3 &= MASK63;
+	a[0] ^= m & (a[0] ^ t0);
+	a[1] ^= m & (a[1] ^ t1);
+	a[2] ^= m & (a[2] ^ t2);
+	a[3] ^= m & (a[3] ^ t3);
+
+#endif
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *kb, size_t kblen, int curve)
+{
+	unsigned char k[32];
+	uint64_t x1[4], x2[4], z2[4], x3[4], z3[4];
+	uint32_t swap;
+	int i;
+
+	(void)curve;
+
+	/*
+	 * Points are encoded over exactly 32 bytes. Multipliers must fit
+	 * in 32 bytes as well.
+	 */
+	if (Glen != 32 || kblen > 32) {
+		return 0;
+	}
+
+	/*
+	 * RFC 7748 mandates that the high bit of the last point byte must
+	 * be ignored/cleared.
+	 */
+	x1[0] = br_dec64le(&G[ 0]);
+	x1[1] = br_dec64le(&G[ 8]);
+	x1[2] = br_dec64le(&G[16]);
+	x1[3] = br_dec64le(&G[24]) & MASK63;
+
+	/*
+	 * We can use memset() to clear values, because exact-width types
+	 * like uint64_t are guaranteed to have no padding bits or
+	 * trap representations.
+	 */
+	memset(x2, 0, sizeof x2);
+	x2[0] = 1;
+	memset(z2, 0, sizeof z2);
+	memcpy(x3, x1, sizeof x1);
+	memcpy(z3, x2, sizeof x2);
+
+	/*
+	 * The multiplier is provided in big-endian notation, and
+	 * possibly shorter than 32 bytes.
+	 */
+	memset(k, 0, (sizeof k) - kblen);
+	memcpy(k + (sizeof k) - kblen, kb, kblen);
+	k[31] &= 0xF8;
+	k[0] &= 0x7F;
+	k[0] |= 0x40;
+
+	swap = 0;
+
+	for (i = 254; i >= 0; i --) {
+		uint64_t a[4], aa[4], b[4], bb[4], e[4];
+		uint64_t c[4], d[4], da[4], cb[4];
+		uint32_t kt;
+
+		kt = (k[31 - (i >> 3)] >> (i & 7)) & 1;
+		swap ^= kt;
+		f255_cswap(x2, x3, swap);
+		f255_cswap(z2, z3, swap);
+		swap = kt;
+
+		/* A = x_2 + z_2 */
+		f255_add(a, x2, z2);
+
+		/* AA = A^2 */
+		f255_mul(aa, a, a);
+
+		/* B = x_2 - z_2 */
+		f255_sub(b, x2, z2);
+
+		/* BB = B^2 */
+		f255_mul(bb, b, b);
+
+		/* E = AA - BB */
+		f255_sub(e, aa, bb);
+
+		/* C = x_3 + z_3 */
+		f255_add(c, x3, z3);
+
+		/* D = x_3 - z_3 */
+		f255_sub(d, x3, z3);
+
+		/* DA = D * A */
+		f255_mul(da, d, a);
+
+		/* CB = C * B */
+		f255_mul(cb, c, b);
+
+		/* x_3 = (DA + CB)^2 */
+		f255_add(x3, da, cb);
+		f255_mul(x3, x3, x3);
+
+		/* z_3 = x_1 * (DA - CB)^2 */
+		f255_sub(z3, da, cb);
+		f255_mul(z3, z3, z3);
+		f255_mul(z3, x1, z3);
+
+		/* x_2 = AA * BB */
+		f255_mul(x2, aa, bb);
+
+		/* z_2 = E * (AA + a24 * E) */
+		f255_mul_a24(z2, e);
+		f255_add(z2, aa, z2);
+		f255_mul(z2, e, z2);
+	}
+
+	f255_cswap(x2, x3, swap);
+	f255_cswap(z2, z3, swap);
+
+	/*
+	 * Compute 1/z2 = z2^(p-2). Since p = 2^255-19, we can mutualize
+	 * most non-squarings. We use x1 and x3, now useless, as temporaries.
+	 */
+	memcpy(x1, z2, sizeof z2);
+	for (i = 0; i < 15; i ++) {
+		f255_mul(x1, x1, x1);
+		f255_mul(x1, x1, z2);
+	}
+	memcpy(x3, x1, sizeof x1);
+	for (i = 0; i < 14; i ++) {
+		int j;
+
+		for (j = 0; j < 16; j ++) {
+			f255_mul(x3, x3, x3);
+		}
+		f255_mul(x3, x3, x1);
+	}
+	for (i = 14; i >= 0; i --) {
+		f255_mul(x3, x3, x3);
+		if ((0xFFEB >> i) & 1) {
+			f255_mul(x3, z2, x3);
+		}
+	}
+
+	/*
+	 * Compute x2/z2. We have 1/z2 in x3.
+	 */
+	f255_mul(x2, x2, x3);
+	f255_final_reduce(x2);
+
+	/*
+	 * Encode the final x2 value in little-endian.
+	 */
+	br_enc64le(G,      x2[0]);
+	br_enc64le(G +  8, x2[1]);
+	br_enc64le(G + 16, x2[2]);
+	br_enc64le(G + 24, x2[3]);
+	return 1;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We don't implement this method, since it is used for ECDSA
+	 * only, and there is no ECDSA over Curve25519 (which instead
+	 * uses EdDSA).
+	 */
+	(void)A;
+	(void)B;
+	(void)len;
+	(void)x;
+	(void)xlen;
+	(void)y;
+	(void)ylen;
+	(void)curve;
+	return 0;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_c25519_m64 = {
+	(uint32_t)0x20000000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m64_get(void)
+{
+	return &br_ec_c25519_m64;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_c25519_m64_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ec_curve25519.c b/third_party/bearssl/src/ec_curve25519.c
new file mode 100644
index 0000000..a47d215
--- /dev/null
+++ b/third_party/bearssl/src/ec_curve25519.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char GEN[] = {
+	0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const unsigned char ORDER[] = {
+	0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
+
+/* see inner.h */
+const br_ec_curve_def br_curve25519 = {
+	BR_EC_curve25519,
+	ORDER, sizeof ORDER,
+	GEN, sizeof GEN
+};
diff --git a/third_party/bearssl/src/ec_default.c b/third_party/bearssl/src/ec_default.c
new file mode 100644
index 0000000..7bb6e0c
--- /dev/null
+++ b/third_party/bearssl/src/ec_default.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ec_all_m15;
+#else
+	return &br_ec_all_m31;
+#endif
+}
diff --git a/third_party/bearssl/src/ec_keygen.c b/third_party/bearssl/src/ec_keygen.c
new file mode 100644
index 0000000..02a3096
--- /dev/null
+++ b/third_party/bearssl/src/ec_keygen.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+size_t
+br_ec_keygen(const br_prng_class **rng_ctx,
+	const br_ec_impl *impl, br_ec_private_key *sk,
+	void *kbuf, int curve)
+{
+	const unsigned char *order;
+	unsigned char *buf;
+	size_t len;
+	unsigned mask;
+
+	if (curve < 0 || curve >= 32
+		|| ((impl->supported_curves >> curve) & 1) == 0)
+	{
+		return 0;
+	}
+	order = impl->order(curve, &len);
+	while (len > 0 && *order == 0) {
+		order ++;
+		len --;
+	}
+	if (kbuf == NULL || len == 0) {
+		return len;
+	}
+	mask = order[0];
+	mask |= (mask >> 1);
+	mask |= (mask >> 2);
+	mask |= (mask >> 4);
+
+	/*
+	 * We generate sequences of random bits of the right size, until
+	 * the value is strictly lower than the curve order (we also
+	 * check for all-zero values, which are invalid).
+	 */
+	buf = kbuf;
+	for (;;) {
+		size_t u;
+		unsigned cc, zz;
+
+		(*rng_ctx)->generate(rng_ctx, buf, len);
+		buf[0] &= mask;
+		cc = 0;
+		u = len;
+		zz = 0;
+		while (u -- > 0) {
+			cc = ((unsigned)(buf[u] - order[u] - cc) >> 8) & 1;
+			zz |= buf[u];
+		}
+		if (cc != 0 && zz != 0) {
+			break;
+		}
+	}
+
+	if (sk != NULL) {
+		sk->curve = curve;
+		sk->x = buf;
+		sk->xlen = len;
+	}
+	return len;
+}
diff --git a/third_party/bearssl/src/ec_p256_m15.c b/third_party/bearssl/src/ec_p256_m15.c
new file mode 100644
index 0000000..05800d8
--- /dev/null
+++ b/third_party/bearssl/src/ec_p256_m15.c
@@ -0,0 +1,2124 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
+ * that right-shifting a signed negative integer copies the sign bit
+ * (arithmetic right-shift). This is "implementation-defined behaviour",
+ * i.e. it is not undefined, but it may differ between compilers. Each
+ * compiler is supposed to document its behaviour in that respect. GCC
+ * explicitly defines that an arithmetic right shift is used. We expect
+ * all other compilers to do the same, because underlying CPU offer an
+ * arithmetic right shift opcode that could not be used otherwise.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)   (((uint32_t)(x) >> (n)) \
+                    | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#else
+#define ARSH(x, n)   ((*(int32_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Convert an integer from unsigned big-endian encoding to a sequence of
+ * 13-bit words in little-endian order. The final "partial" word is
+ * returned.
+ */
+static uint32_t
+be8_to_le13(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		acc |= (uint32_t)src[len] << acc_len;
+		acc_len += 8;
+		if (acc_len >= 13) {
+			*dst ++ = acc & 0x1FFF;
+			acc >>= 13;
+			acc_len -= 13;
+		}
+	}
+	return acc;
+}
+
+/*
+ * Convert an integer (13-bit words, little-endian) to unsigned
+ * big-endian encoding. The total encoding length is provided; all
+ * the destination bytes will be filled.
+ */
+static void
+le13_to_be8(unsigned char *dst, size_t len, const uint32_t *src)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			acc |= (*src ++) << acc_len;
+			acc_len += 13;
+		}
+		dst[len] = (unsigned char)acc;
+		acc >>= 8;
+		acc_len -= 8;
+	}
+}
+
+/*
+ * Normalise an array of words to a strict 13 bits per word. Returned
+ * value is the resulting carry. The source (w) and destination (d)
+ * arrays may be identical, but shall not overlap partially.
+ */
+static inline uint32_t
+norm13(uint32_t *d, const uint32_t *w, size_t len)
+{
+	size_t u;
+	uint32_t cc;
+
+	cc = 0;
+	for (u = 0; u < len; u ++) {
+		int32_t z;
+
+		z = w[u] + cc;
+		d[u] = z & 0x1FFF;
+		cc = ARSH(z, 13);
+	}
+	return cc;
+}
+
+/*
+ * mul20() multiplies two 260-bit integers together. Each word must fit
+ * on 13 bits; source operands use 20 words, destination operand
+ * receives 40 words. All overlaps allowed.
+ *
+ * square20() computes the square of a 260-bit integer. Each word must
+ * fit on 13 bits; source operand uses 20 words, destination operand
+ * receives 40 words. All overlaps allowed.
+ */
+
+#if BR_SLOW_MUL15
+
+static void
+mul20(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Two-level Karatsuba: turns a 20x20 multiplication into
+	 * nine 5x5 multiplications. We use 13-bit words but do not
+	 * propagate carries immediately, so words may expand:
+	 *
+	 *  - First Karatsuba decomposition turns the 20x20 mul on
+	 *    13-bit words into three 10x10 muls, two on 13-bit words
+	 *    and one on 14-bit words.
+	 *
+	 *  - Second Karatsuba decomposition further splits these into:
+	 *
+	 *     * four 5x5 muls on 13-bit words
+	 *     * four 5x5 muls on 14-bit words
+	 *     * one 5x5 mul on 15-bit words
+	 *
+	 * Highest word value is 8191, 16382 or 32764, for 13-bit, 14-bit
+	 * or 15-bit words, respectively.
+	 */
+	uint32_t u[45], v[45], w[90];
+	uint32_t cc;
+	int i;
+
+#define ZADD(dw, d_off, s1w, s1_off, s2w, s2_off)   do { \
+		(dw)[5 * (d_off) + 0] = (s1w)[5 * (s1_off) + 0] \
+			+ (s2w)[5 * (s2_off) + 0]; \
+		(dw)[5 * (d_off) + 1] = (s1w)[5 * (s1_off) + 1] \
+			+ (s2w)[5 * (s2_off) + 1]; \
+		(dw)[5 * (d_off) + 2] = (s1w)[5 * (s1_off) + 2] \
+			+ (s2w)[5 * (s2_off) + 2]; \
+		(dw)[5 * (d_off) + 3] = (s1w)[5 * (s1_off) + 3] \
+			+ (s2w)[5 * (s2_off) + 3]; \
+		(dw)[5 * (d_off) + 4] = (s1w)[5 * (s1_off) + 4] \
+			+ (s2w)[5 * (s2_off) + 4]; \
+	} while (0)
+
+#define ZADDT(dw, d_off, sw, s_off)   do { \
+		(dw)[5 * (d_off) + 0] += (sw)[5 * (s_off) + 0]; \
+		(dw)[5 * (d_off) + 1] += (sw)[5 * (s_off) + 1]; \
+		(dw)[5 * (d_off) + 2] += (sw)[5 * (s_off) + 2]; \
+		(dw)[5 * (d_off) + 3] += (sw)[5 * (s_off) + 3]; \
+		(dw)[5 * (d_off) + 4] += (sw)[5 * (s_off) + 4]; \
+	} while (0)
+
+#define ZSUB2F(dw, d_off, s1w, s1_off, s2w, s2_off)   do { \
+		(dw)[5 * (d_off) + 0] -= (s1w)[5 * (s1_off) + 0] \
+			+ (s2w)[5 * (s2_off) + 0]; \
+		(dw)[5 * (d_off) + 1] -= (s1w)[5 * (s1_off) + 1] \
+			+ (s2w)[5 * (s2_off) + 1]; \
+		(dw)[5 * (d_off) + 2] -= (s1w)[5 * (s1_off) + 2] \
+			+ (s2w)[5 * (s2_off) + 2]; \
+		(dw)[5 * (d_off) + 3] -= (s1w)[5 * (s1_off) + 3] \
+			+ (s2w)[5 * (s2_off) + 3]; \
+		(dw)[5 * (d_off) + 4] -= (s1w)[5 * (s1_off) + 4] \
+			+ (s2w)[5 * (s2_off) + 4]; \
+	} while (0)
+
+#define CPR1(w, cprcc)   do { \
+		uint32_t cprz = (w) + cprcc; \
+		(w) = cprz & 0x1FFF; \
+		cprcc = cprz >> 13; \
+	} while (0)
+
+#define CPR(dw, d_off)   do { \
+		uint32_t cprcc; \
+		cprcc = 0; \
+		CPR1((dw)[(d_off) + 0], cprcc); \
+		CPR1((dw)[(d_off) + 1], cprcc); \
+		CPR1((dw)[(d_off) + 2], cprcc); \
+		CPR1((dw)[(d_off) + 3], cprcc); \
+		CPR1((dw)[(d_off) + 4], cprcc); \
+		CPR1((dw)[(d_off) + 5], cprcc); \
+		CPR1((dw)[(d_off) + 6], cprcc); \
+		CPR1((dw)[(d_off) + 7], cprcc); \
+		CPR1((dw)[(d_off) + 8], cprcc); \
+		(dw)[(d_off) + 9] = cprcc; \
+	} while (0)
+
+	memcpy(u, a, 20 * sizeof *a);
+	ZADD(u, 4, a, 0, a, 1);
+	ZADD(u, 5, a, 2, a, 3);
+	ZADD(u, 6, a, 0, a, 2);
+	ZADD(u, 7, a, 1, a, 3);
+	ZADD(u, 8, u, 6, u, 7);
+
+	memcpy(v, b, 20 * sizeof *b);
+	ZADD(v, 4, b, 0, b, 1);
+	ZADD(v, 5, b, 2, b, 3);
+	ZADD(v, 6, b, 0, b, 2);
+	ZADD(v, 7, b, 1, b, 3);
+	ZADD(v, 8, v, 6, v, 7);
+
+	/*
+	 * Do the eight first 8x8 muls. Source words are at most 16382
+	 * each, so we can add product results together "as is" in 32-bit
+	 * words.
+	 */
+	for (i = 0; i < 40; i += 5) {
+		w[(i << 1) + 0] = MUL15(u[i + 0], v[i + 0]);
+		w[(i << 1) + 1] = MUL15(u[i + 0], v[i + 1])
+			+ MUL15(u[i + 1], v[i + 0]);
+		w[(i << 1) + 2] = MUL15(u[i + 0], v[i + 2])
+			+ MUL15(u[i + 1], v[i + 1])
+			+ MUL15(u[i + 2], v[i + 0]);
+		w[(i << 1) + 3] = MUL15(u[i + 0], v[i + 3])
+			+ MUL15(u[i + 1], v[i + 2])
+			+ MUL15(u[i + 2], v[i + 1])
+			+ MUL15(u[i + 3], v[i + 0]);
+		w[(i << 1) + 4] = MUL15(u[i + 0], v[i + 4])
+			+ MUL15(u[i + 1], v[i + 3])
+			+ MUL15(u[i + 2], v[i + 2])
+			+ MUL15(u[i + 3], v[i + 1])
+			+ MUL15(u[i + 4], v[i + 0]);
+		w[(i << 1) + 5] = MUL15(u[i + 1], v[i + 4])
+			+ MUL15(u[i + 2], v[i + 3])
+			+ MUL15(u[i + 3], v[i + 2])
+			+ MUL15(u[i + 4], v[i + 1]);
+		w[(i << 1) + 6] = MUL15(u[i + 2], v[i + 4])
+			+ MUL15(u[i + 3], v[i + 3])
+			+ MUL15(u[i + 4], v[i + 2]);
+		w[(i << 1) + 7] = MUL15(u[i + 3], v[i + 4])
+			+ MUL15(u[i + 4], v[i + 3]);
+		w[(i << 1) + 8] = MUL15(u[i + 4], v[i + 4]);
+		w[(i << 1) + 9] = 0;
+	}
+
+	/*
+	 * For the 9th multiplication, source words are up to 32764,
+	 * so we must do some carry propagation. If we add up to
+	 * 4 products and the carry is no more than 524224, then the
+	 * result fits in 32 bits, and the next carry will be no more
+	 * than 524224 (because 4*(32764^2)+524224 < 8192*524225).
+	 *
+	 * We thus just skip one of the products in the middle word,
+	 * then do a carry propagation (this reduces words to 13 bits
+	 * each, except possibly the last, which may use up to 17 bits
+	 * or so), then add the missing product.
+	 */
+	w[80 + 0] = MUL15(u[40 + 0], v[40 + 0]);
+	w[80 + 1] = MUL15(u[40 + 0], v[40 + 1])
+		+ MUL15(u[40 + 1], v[40 + 0]);
+	w[80 + 2] = MUL15(u[40 + 0], v[40 + 2])
+		+ MUL15(u[40 + 1], v[40 + 1])
+		+ MUL15(u[40 + 2], v[40 + 0]);
+	w[80 + 3] = MUL15(u[40 + 0], v[40 + 3])
+		+ MUL15(u[40 + 1], v[40 + 2])
+		+ MUL15(u[40 + 2], v[40 + 1])
+		+ MUL15(u[40 + 3], v[40 + 0]);
+	w[80 + 4] = MUL15(u[40 + 0], v[40 + 4])
+		+ MUL15(u[40 + 1], v[40 + 3])
+		+ MUL15(u[40 + 2], v[40 + 2])
+		+ MUL15(u[40 + 3], v[40 + 1]);
+		/* + MUL15(u[40 + 4], v[40 + 0]) */
+	w[80 + 5] = MUL15(u[40 + 1], v[40 + 4])
+		+ MUL15(u[40 + 2], v[40 + 3])
+		+ MUL15(u[40 + 3], v[40 + 2])
+		+ MUL15(u[40 + 4], v[40 + 1]);
+	w[80 + 6] = MUL15(u[40 + 2], v[40 + 4])
+		+ MUL15(u[40 + 3], v[40 + 3])
+		+ MUL15(u[40 + 4], v[40 + 2]);
+	w[80 + 7] = MUL15(u[40 + 3], v[40 + 4])
+		+ MUL15(u[40 + 4], v[40 + 3]);
+	w[80 + 8] = MUL15(u[40 + 4], v[40 + 4]);
+
+	CPR(w, 80);
+
+	w[80 + 4] += MUL15(u[40 + 4], v[40 + 0]);
+
+	/*
+	 * The products on 14-bit words in slots 6 and 7 yield values
+	 * up to 5*(16382^2) each, and we need to subtract two such
+	 * values from the higher word. We need the subtraction to fit
+	 * in a _signed_ 32-bit integer, i.e. 31 bits + a sign bit.
+	 * However, 10*(16382^2) does not fit. So we must perform a
+	 * bit of reduction here.
+	 */
+	CPR(w, 60);
+	CPR(w, 70);
+
+	/*
+	 * Recompose results.
+	 */
+
+	/* 0..1*0..1 into 0..3 */
+	ZSUB2F(w, 8, w, 0, w, 2);
+	ZSUB2F(w, 9, w, 1, w, 3);
+	ZADDT(w, 1, w, 8);
+	ZADDT(w, 2, w, 9);
+
+	/* 2..3*2..3 into 4..7 */
+	ZSUB2F(w, 10, w, 4, w, 6);
+	ZSUB2F(w, 11, w, 5, w, 7);
+	ZADDT(w, 5, w, 10);
+	ZADDT(w, 6, w, 11);
+
+	/* (0..1+2..3)*(0..1+2..3) into 12..15 */
+	ZSUB2F(w, 16, w, 12, w, 14);
+	ZSUB2F(w, 17, w, 13, w, 15);
+	ZADDT(w, 13, w, 16);
+	ZADDT(w, 14, w, 17);
+
+	/* first-level recomposition */
+	ZSUB2F(w, 12, w, 0, w, 4);
+	ZSUB2F(w, 13, w, 1, w, 5);
+	ZSUB2F(w, 14, w, 2, w, 6);
+	ZSUB2F(w, 15, w, 3, w, 7);
+	ZADDT(w, 2, w, 12);
+	ZADDT(w, 3, w, 13);
+	ZADDT(w, 4, w, 14);
+	ZADDT(w, 5, w, 15);
+
+	/*
+	 * Perform carry propagation to bring all words down to 13 bits.
+	 */
+	cc = norm13(d, w, 40);
+	d[39] += (cc << 13);
+
+#undef ZADD
+#undef ZADDT
+#undef ZSUB2F
+#undef CPR1
+#undef CPR
+}
+
+static inline void
+square20(uint32_t *d, const uint32_t *a)
+{
+	mul20(d, a, a);
+}
+
+#else
+
+static void
+mul20(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[39];
+
+	t[ 0] = MUL15(a[ 0], b[ 0]);
+	t[ 1] = MUL15(a[ 0], b[ 1])
+		+ MUL15(a[ 1], b[ 0]);
+	t[ 2] = MUL15(a[ 0], b[ 2])
+		+ MUL15(a[ 1], b[ 1])
+		+ MUL15(a[ 2], b[ 0]);
+	t[ 3] = MUL15(a[ 0], b[ 3])
+		+ MUL15(a[ 1], b[ 2])
+		+ MUL15(a[ 2], b[ 1])
+		+ MUL15(a[ 3], b[ 0]);
+	t[ 4] = MUL15(a[ 0], b[ 4])
+		+ MUL15(a[ 1], b[ 3])
+		+ MUL15(a[ 2], b[ 2])
+		+ MUL15(a[ 3], b[ 1])
+		+ MUL15(a[ 4], b[ 0]);
+	t[ 5] = MUL15(a[ 0], b[ 5])
+		+ MUL15(a[ 1], b[ 4])
+		+ MUL15(a[ 2], b[ 3])
+		+ MUL15(a[ 3], b[ 2])
+		+ MUL15(a[ 4], b[ 1])
+		+ MUL15(a[ 5], b[ 0]);
+	t[ 6] = MUL15(a[ 0], b[ 6])
+		+ MUL15(a[ 1], b[ 5])
+		+ MUL15(a[ 2], b[ 4])
+		+ MUL15(a[ 3], b[ 3])
+		+ MUL15(a[ 4], b[ 2])
+		+ MUL15(a[ 5], b[ 1])
+		+ MUL15(a[ 6], b[ 0]);
+	t[ 7] = MUL15(a[ 0], b[ 7])
+		+ MUL15(a[ 1], b[ 6])
+		+ MUL15(a[ 2], b[ 5])
+		+ MUL15(a[ 3], b[ 4])
+		+ MUL15(a[ 4], b[ 3])
+		+ MUL15(a[ 5], b[ 2])
+		+ MUL15(a[ 6], b[ 1])
+		+ MUL15(a[ 7], b[ 0]);
+	t[ 8] = MUL15(a[ 0], b[ 8])
+		+ MUL15(a[ 1], b[ 7])
+		+ MUL15(a[ 2], b[ 6])
+		+ MUL15(a[ 3], b[ 5])
+		+ MUL15(a[ 4], b[ 4])
+		+ MUL15(a[ 5], b[ 3])
+		+ MUL15(a[ 6], b[ 2])
+		+ MUL15(a[ 7], b[ 1])
+		+ MUL15(a[ 8], b[ 0]);
+	t[ 9] = MUL15(a[ 0], b[ 9])
+		+ MUL15(a[ 1], b[ 8])
+		+ MUL15(a[ 2], b[ 7])
+		+ MUL15(a[ 3], b[ 6])
+		+ MUL15(a[ 4], b[ 5])
+		+ MUL15(a[ 5], b[ 4])
+		+ MUL15(a[ 6], b[ 3])
+		+ MUL15(a[ 7], b[ 2])
+		+ MUL15(a[ 8], b[ 1])
+		+ MUL15(a[ 9], b[ 0]);
+	t[10] = MUL15(a[ 0], b[10])
+		+ MUL15(a[ 1], b[ 9])
+		+ MUL15(a[ 2], b[ 8])
+		+ MUL15(a[ 3], b[ 7])
+		+ MUL15(a[ 4], b[ 6])
+		+ MUL15(a[ 5], b[ 5])
+		+ MUL15(a[ 6], b[ 4])
+		+ MUL15(a[ 7], b[ 3])
+		+ MUL15(a[ 8], b[ 2])
+		+ MUL15(a[ 9], b[ 1])
+		+ MUL15(a[10], b[ 0]);
+	t[11] = MUL15(a[ 0], b[11])
+		+ MUL15(a[ 1], b[10])
+		+ MUL15(a[ 2], b[ 9])
+		+ MUL15(a[ 3], b[ 8])
+		+ MUL15(a[ 4], b[ 7])
+		+ MUL15(a[ 5], b[ 6])
+		+ MUL15(a[ 6], b[ 5])
+		+ MUL15(a[ 7], b[ 4])
+		+ MUL15(a[ 8], b[ 3])
+		+ MUL15(a[ 9], b[ 2])
+		+ MUL15(a[10], b[ 1])
+		+ MUL15(a[11], b[ 0]);
+	t[12] = MUL15(a[ 0], b[12])
+		+ MUL15(a[ 1], b[11])
+		+ MUL15(a[ 2], b[10])
+		+ MUL15(a[ 3], b[ 9])
+		+ MUL15(a[ 4], b[ 8])
+		+ MUL15(a[ 5], b[ 7])
+		+ MUL15(a[ 6], b[ 6])
+		+ MUL15(a[ 7], b[ 5])
+		+ MUL15(a[ 8], b[ 4])
+		+ MUL15(a[ 9], b[ 3])
+		+ MUL15(a[10], b[ 2])
+		+ MUL15(a[11], b[ 1])
+		+ MUL15(a[12], b[ 0]);
+	t[13] = MUL15(a[ 0], b[13])
+		+ MUL15(a[ 1], b[12])
+		+ MUL15(a[ 2], b[11])
+		+ MUL15(a[ 3], b[10])
+		+ MUL15(a[ 4], b[ 9])
+		+ MUL15(a[ 5], b[ 8])
+		+ MUL15(a[ 6], b[ 7])
+		+ MUL15(a[ 7], b[ 6])
+		+ MUL15(a[ 8], b[ 5])
+		+ MUL15(a[ 9], b[ 4])
+		+ MUL15(a[10], b[ 3])
+		+ MUL15(a[11], b[ 2])
+		+ MUL15(a[12], b[ 1])
+		+ MUL15(a[13], b[ 0]);
+	t[14] = MUL15(a[ 0], b[14])
+		+ MUL15(a[ 1], b[13])
+		+ MUL15(a[ 2], b[12])
+		+ MUL15(a[ 3], b[11])
+		+ MUL15(a[ 4], b[10])
+		+ MUL15(a[ 5], b[ 9])
+		+ MUL15(a[ 6], b[ 8])
+		+ MUL15(a[ 7], b[ 7])
+		+ MUL15(a[ 8], b[ 6])
+		+ MUL15(a[ 9], b[ 5])
+		+ MUL15(a[10], b[ 4])
+		+ MUL15(a[11], b[ 3])
+		+ MUL15(a[12], b[ 2])
+		+ MUL15(a[13], b[ 1])
+		+ MUL15(a[14], b[ 0]);
+	t[15] = MUL15(a[ 0], b[15])
+		+ MUL15(a[ 1], b[14])
+		+ MUL15(a[ 2], b[13])
+		+ MUL15(a[ 3], b[12])
+		+ MUL15(a[ 4], b[11])
+		+ MUL15(a[ 5], b[10])
+		+ MUL15(a[ 6], b[ 9])
+		+ MUL15(a[ 7], b[ 8])
+		+ MUL15(a[ 8], b[ 7])
+		+ MUL15(a[ 9], b[ 6])
+		+ MUL15(a[10], b[ 5])
+		+ MUL15(a[11], b[ 4])
+		+ MUL15(a[12], b[ 3])
+		+ MUL15(a[13], b[ 2])
+		+ MUL15(a[14], b[ 1])
+		+ MUL15(a[15], b[ 0]);
+	t[16] = MUL15(a[ 0], b[16])
+		+ MUL15(a[ 1], b[15])
+		+ MUL15(a[ 2], b[14])
+		+ MUL15(a[ 3], b[13])
+		+ MUL15(a[ 4], b[12])
+		+ MUL15(a[ 5], b[11])
+		+ MUL15(a[ 6], b[10])
+		+ MUL15(a[ 7], b[ 9])
+		+ MUL15(a[ 8], b[ 8])
+		+ MUL15(a[ 9], b[ 7])
+		+ MUL15(a[10], b[ 6])
+		+ MUL15(a[11], b[ 5])
+		+ MUL15(a[12], b[ 4])
+		+ MUL15(a[13], b[ 3])
+		+ MUL15(a[14], b[ 2])
+		+ MUL15(a[15], b[ 1])
+		+ MUL15(a[16], b[ 0]);
+	t[17] = MUL15(a[ 0], b[17])
+		+ MUL15(a[ 1], b[16])
+		+ MUL15(a[ 2], b[15])
+		+ MUL15(a[ 3], b[14])
+		+ MUL15(a[ 4], b[13])
+		+ MUL15(a[ 5], b[12])
+		+ MUL15(a[ 6], b[11])
+		+ MUL15(a[ 7], b[10])
+		+ MUL15(a[ 8], b[ 9])
+		+ MUL15(a[ 9], b[ 8])
+		+ MUL15(a[10], b[ 7])
+		+ MUL15(a[11], b[ 6])
+		+ MUL15(a[12], b[ 5])
+		+ MUL15(a[13], b[ 4])
+		+ MUL15(a[14], b[ 3])
+		+ MUL15(a[15], b[ 2])
+		+ MUL15(a[16], b[ 1])
+		+ MUL15(a[17], b[ 0]);
+	t[18] = MUL15(a[ 0], b[18])
+		+ MUL15(a[ 1], b[17])
+		+ MUL15(a[ 2], b[16])
+		+ MUL15(a[ 3], b[15])
+		+ MUL15(a[ 4], b[14])
+		+ MUL15(a[ 5], b[13])
+		+ MUL15(a[ 6], b[12])
+		+ MUL15(a[ 7], b[11])
+		+ MUL15(a[ 8], b[10])
+		+ MUL15(a[ 9], b[ 9])
+		+ MUL15(a[10], b[ 8])
+		+ MUL15(a[11], b[ 7])
+		+ MUL15(a[12], b[ 6])
+		+ MUL15(a[13], b[ 5])
+		+ MUL15(a[14], b[ 4])
+		+ MUL15(a[15], b[ 3])
+		+ MUL15(a[16], b[ 2])
+		+ MUL15(a[17], b[ 1])
+		+ MUL15(a[18], b[ 0]);
+	t[19] = MUL15(a[ 0], b[19])
+		+ MUL15(a[ 1], b[18])
+		+ MUL15(a[ 2], b[17])
+		+ MUL15(a[ 3], b[16])
+		+ MUL15(a[ 4], b[15])
+		+ MUL15(a[ 5], b[14])
+		+ MUL15(a[ 6], b[13])
+		+ MUL15(a[ 7], b[12])
+		+ MUL15(a[ 8], b[11])
+		+ MUL15(a[ 9], b[10])
+		+ MUL15(a[10], b[ 9])
+		+ MUL15(a[11], b[ 8])
+		+ MUL15(a[12], b[ 7])
+		+ MUL15(a[13], b[ 6])
+		+ MUL15(a[14], b[ 5])
+		+ MUL15(a[15], b[ 4])
+		+ MUL15(a[16], b[ 3])
+		+ MUL15(a[17], b[ 2])
+		+ MUL15(a[18], b[ 1])
+		+ MUL15(a[19], b[ 0]);
+	t[20] = MUL15(a[ 1], b[19])
+		+ MUL15(a[ 2], b[18])
+		+ MUL15(a[ 3], b[17])
+		+ MUL15(a[ 4], b[16])
+		+ MUL15(a[ 5], b[15])
+		+ MUL15(a[ 6], b[14])
+		+ MUL15(a[ 7], b[13])
+		+ MUL15(a[ 8], b[12])
+		+ MUL15(a[ 9], b[11])
+		+ MUL15(a[10], b[10])
+		+ MUL15(a[11], b[ 9])
+		+ MUL15(a[12], b[ 8])
+		+ MUL15(a[13], b[ 7])
+		+ MUL15(a[14], b[ 6])
+		+ MUL15(a[15], b[ 5])
+		+ MUL15(a[16], b[ 4])
+		+ MUL15(a[17], b[ 3])
+		+ MUL15(a[18], b[ 2])
+		+ MUL15(a[19], b[ 1]);
+	t[21] = MUL15(a[ 2], b[19])
+		+ MUL15(a[ 3], b[18])
+		+ MUL15(a[ 4], b[17])
+		+ MUL15(a[ 5], b[16])
+		+ MUL15(a[ 6], b[15])
+		+ MUL15(a[ 7], b[14])
+		+ MUL15(a[ 8], b[13])
+		+ MUL15(a[ 9], b[12])
+		+ MUL15(a[10], b[11])
+		+ MUL15(a[11], b[10])
+		+ MUL15(a[12], b[ 9])
+		+ MUL15(a[13], b[ 8])
+		+ MUL15(a[14], b[ 7])
+		+ MUL15(a[15], b[ 6])
+		+ MUL15(a[16], b[ 5])
+		+ MUL15(a[17], b[ 4])
+		+ MUL15(a[18], b[ 3])
+		+ MUL15(a[19], b[ 2]);
+	t[22] = MUL15(a[ 3], b[19])
+		+ MUL15(a[ 4], b[18])
+		+ MUL15(a[ 5], b[17])
+		+ MUL15(a[ 6], b[16])
+		+ MUL15(a[ 7], b[15])
+		+ MUL15(a[ 8], b[14])
+		+ MUL15(a[ 9], b[13])
+		+ MUL15(a[10], b[12])
+		+ MUL15(a[11], b[11])
+		+ MUL15(a[12], b[10])
+		+ MUL15(a[13], b[ 9])
+		+ MUL15(a[14], b[ 8])
+		+ MUL15(a[15], b[ 7])
+		+ MUL15(a[16], b[ 6])
+		+ MUL15(a[17], b[ 5])
+		+ MUL15(a[18], b[ 4])
+		+ MUL15(a[19], b[ 3]);
+	t[23] = MUL15(a[ 4], b[19])
+		+ MUL15(a[ 5], b[18])
+		+ MUL15(a[ 6], b[17])
+		+ MUL15(a[ 7], b[16])
+		+ MUL15(a[ 8], b[15])
+		+ MUL15(a[ 9], b[14])
+		+ MUL15(a[10], b[13])
+		+ MUL15(a[11], b[12])
+		+ MUL15(a[12], b[11])
+		+ MUL15(a[13], b[10])
+		+ MUL15(a[14], b[ 9])
+		+ MUL15(a[15], b[ 8])
+		+ MUL15(a[16], b[ 7])
+		+ MUL15(a[17], b[ 6])
+		+ MUL15(a[18], b[ 5])
+		+ MUL15(a[19], b[ 4]);
+	t[24] = MUL15(a[ 5], b[19])
+		+ MUL15(a[ 6], b[18])
+		+ MUL15(a[ 7], b[17])
+		+ MUL15(a[ 8], b[16])
+		+ MUL15(a[ 9], b[15])
+		+ MUL15(a[10], b[14])
+		+ MUL15(a[11], b[13])
+		+ MUL15(a[12], b[12])
+		+ MUL15(a[13], b[11])
+		+ MUL15(a[14], b[10])
+		+ MUL15(a[15], b[ 9])
+		+ MUL15(a[16], b[ 8])
+		+ MUL15(a[17], b[ 7])
+		+ MUL15(a[18], b[ 6])
+		+ MUL15(a[19], b[ 5]);
+	t[25] = MUL15(a[ 6], b[19])
+		+ MUL15(a[ 7], b[18])
+		+ MUL15(a[ 8], b[17])
+		+ MUL15(a[ 9], b[16])
+		+ MUL15(a[10], b[15])
+		+ MUL15(a[11], b[14])
+		+ MUL15(a[12], b[13])
+		+ MUL15(a[13], b[12])
+		+ MUL15(a[14], b[11])
+		+ MUL15(a[15], b[10])
+		+ MUL15(a[16], b[ 9])
+		+ MUL15(a[17], b[ 8])
+		+ MUL15(a[18], b[ 7])
+		+ MUL15(a[19], b[ 6]);
+	t[26] = MUL15(a[ 7], b[19])
+		+ MUL15(a[ 8], b[18])
+		+ MUL15(a[ 9], b[17])
+		+ MUL15(a[10], b[16])
+		+ MUL15(a[11], b[15])
+		+ MUL15(a[12], b[14])
+		+ MUL15(a[13], b[13])
+		+ MUL15(a[14], b[12])
+		+ MUL15(a[15], b[11])
+		+ MUL15(a[16], b[10])
+		+ MUL15(a[17], b[ 9])
+		+ MUL15(a[18], b[ 8])
+		+ MUL15(a[19], b[ 7]);
+	t[27] = MUL15(a[ 8], b[19])
+		+ MUL15(a[ 9], b[18])
+		+ MUL15(a[10], b[17])
+		+ MUL15(a[11], b[16])
+		+ MUL15(a[12], b[15])
+		+ MUL15(a[13], b[14])
+		+ MUL15(a[14], b[13])
+		+ MUL15(a[15], b[12])
+		+ MUL15(a[16], b[11])
+		+ MUL15(a[17], b[10])
+		+ MUL15(a[18], b[ 9])
+		+ MUL15(a[19], b[ 8]);
+	t[28] = MUL15(a[ 9], b[19])
+		+ MUL15(a[10], b[18])
+		+ MUL15(a[11], b[17])
+		+ MUL15(a[12], b[16])
+		+ MUL15(a[13], b[15])
+		+ MUL15(a[14], b[14])
+		+ MUL15(a[15], b[13])
+		+ MUL15(a[16], b[12])
+		+ MUL15(a[17], b[11])
+		+ MUL15(a[18], b[10])
+		+ MUL15(a[19], b[ 9]);
+	t[29] = MUL15(a[10], b[19])
+		+ MUL15(a[11], b[18])
+		+ MUL15(a[12], b[17])
+		+ MUL15(a[13], b[16])
+		+ MUL15(a[14], b[15])
+		+ MUL15(a[15], b[14])
+		+ MUL15(a[16], b[13])
+		+ MUL15(a[17], b[12])
+		+ MUL15(a[18], b[11])
+		+ MUL15(a[19], b[10]);
+	t[30] = MUL15(a[11], b[19])
+		+ MUL15(a[12], b[18])
+		+ MUL15(a[13], b[17])
+		+ MUL15(a[14], b[16])
+		+ MUL15(a[15], b[15])
+		+ MUL15(a[16], b[14])
+		+ MUL15(a[17], b[13])
+		+ MUL15(a[18], b[12])
+		+ MUL15(a[19], b[11]);
+	t[31] = MUL15(a[12], b[19])
+		+ MUL15(a[13], b[18])
+		+ MUL15(a[14], b[17])
+		+ MUL15(a[15], b[16])
+		+ MUL15(a[16], b[15])
+		+ MUL15(a[17], b[14])
+		+ MUL15(a[18], b[13])
+		+ MUL15(a[19], b[12]);
+	t[32] = MUL15(a[13], b[19])
+		+ MUL15(a[14], b[18])
+		+ MUL15(a[15], b[17])
+		+ MUL15(a[16], b[16])
+		+ MUL15(a[17], b[15])
+		+ MUL15(a[18], b[14])
+		+ MUL15(a[19], b[13]);
+	t[33] = MUL15(a[14], b[19])
+		+ MUL15(a[15], b[18])
+		+ MUL15(a[16], b[17])
+		+ MUL15(a[17], b[16])
+		+ MUL15(a[18], b[15])
+		+ MUL15(a[19], b[14]);
+	t[34] = MUL15(a[15], b[19])
+		+ MUL15(a[16], b[18])
+		+ MUL15(a[17], b[17])
+		+ MUL15(a[18], b[16])
+		+ MUL15(a[19], b[15]);
+	t[35] = MUL15(a[16], b[19])
+		+ MUL15(a[17], b[18])
+		+ MUL15(a[18], b[17])
+		+ MUL15(a[19], b[16]);
+	t[36] = MUL15(a[17], b[19])
+		+ MUL15(a[18], b[18])
+		+ MUL15(a[19], b[17]);
+	t[37] = MUL15(a[18], b[19])
+		+ MUL15(a[19], b[18]);
+	t[38] = MUL15(a[19], b[19]);
+	d[39] = norm13(d, t, 39);
+}
+
+static void
+square20(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[39];
+
+	t[ 0] = MUL15(a[ 0], a[ 0]);
+	t[ 1] = ((MUL15(a[ 0], a[ 1])) << 1);
+	t[ 2] = MUL15(a[ 1], a[ 1])
+		+ ((MUL15(a[ 0], a[ 2])) << 1);
+	t[ 3] = ((MUL15(a[ 0], a[ 3])
+		+ MUL15(a[ 1], a[ 2])) << 1);
+	t[ 4] = MUL15(a[ 2], a[ 2])
+		+ ((MUL15(a[ 0], a[ 4])
+		+ MUL15(a[ 1], a[ 3])) << 1);
+	t[ 5] = ((MUL15(a[ 0], a[ 5])
+		+ MUL15(a[ 1], a[ 4])
+		+ MUL15(a[ 2], a[ 3])) << 1);
+	t[ 6] = MUL15(a[ 3], a[ 3])
+		+ ((MUL15(a[ 0], a[ 6])
+		+ MUL15(a[ 1], a[ 5])
+		+ MUL15(a[ 2], a[ 4])) << 1);
+	t[ 7] = ((MUL15(a[ 0], a[ 7])
+		+ MUL15(a[ 1], a[ 6])
+		+ MUL15(a[ 2], a[ 5])
+		+ MUL15(a[ 3], a[ 4])) << 1);
+	t[ 8] = MUL15(a[ 4], a[ 4])
+		+ ((MUL15(a[ 0], a[ 8])
+		+ MUL15(a[ 1], a[ 7])
+		+ MUL15(a[ 2], a[ 6])
+		+ MUL15(a[ 3], a[ 5])) << 1);
+	t[ 9] = ((MUL15(a[ 0], a[ 9])
+		+ MUL15(a[ 1], a[ 8])
+		+ MUL15(a[ 2], a[ 7])
+		+ MUL15(a[ 3], a[ 6])
+		+ MUL15(a[ 4], a[ 5])) << 1);
+	t[10] = MUL15(a[ 5], a[ 5])
+		+ ((MUL15(a[ 0], a[10])
+		+ MUL15(a[ 1], a[ 9])
+		+ MUL15(a[ 2], a[ 8])
+		+ MUL15(a[ 3], a[ 7])
+		+ MUL15(a[ 4], a[ 6])) << 1);
+	t[11] = ((MUL15(a[ 0], a[11])
+		+ MUL15(a[ 1], a[10])
+		+ MUL15(a[ 2], a[ 9])
+		+ MUL15(a[ 3], a[ 8])
+		+ MUL15(a[ 4], a[ 7])
+		+ MUL15(a[ 5], a[ 6])) << 1);
+	t[12] = MUL15(a[ 6], a[ 6])
+		+ ((MUL15(a[ 0], a[12])
+		+ MUL15(a[ 1], a[11])
+		+ MUL15(a[ 2], a[10])
+		+ MUL15(a[ 3], a[ 9])
+		+ MUL15(a[ 4], a[ 8])
+		+ MUL15(a[ 5], a[ 7])) << 1);
+	t[13] = ((MUL15(a[ 0], a[13])
+		+ MUL15(a[ 1], a[12])
+		+ MUL15(a[ 2], a[11])
+		+ MUL15(a[ 3], a[10])
+		+ MUL15(a[ 4], a[ 9])
+		+ MUL15(a[ 5], a[ 8])
+		+ MUL15(a[ 6], a[ 7])) << 1);
+	t[14] = MUL15(a[ 7], a[ 7])
+		+ ((MUL15(a[ 0], a[14])
+		+ MUL15(a[ 1], a[13])
+		+ MUL15(a[ 2], a[12])
+		+ MUL15(a[ 3], a[11])
+		+ MUL15(a[ 4], a[10])
+		+ MUL15(a[ 5], a[ 9])
+		+ MUL15(a[ 6], a[ 8])) << 1);
+	t[15] = ((MUL15(a[ 0], a[15])
+		+ MUL15(a[ 1], a[14])
+		+ MUL15(a[ 2], a[13])
+		+ MUL15(a[ 3], a[12])
+		+ MUL15(a[ 4], a[11])
+		+ MUL15(a[ 5], a[10])
+		+ MUL15(a[ 6], a[ 9])
+		+ MUL15(a[ 7], a[ 8])) << 1);
+	t[16] = MUL15(a[ 8], a[ 8])
+		+ ((MUL15(a[ 0], a[16])
+		+ MUL15(a[ 1], a[15])
+		+ MUL15(a[ 2], a[14])
+		+ MUL15(a[ 3], a[13])
+		+ MUL15(a[ 4], a[12])
+		+ MUL15(a[ 5], a[11])
+		+ MUL15(a[ 6], a[10])
+		+ MUL15(a[ 7], a[ 9])) << 1);
+	t[17] = ((MUL15(a[ 0], a[17])
+		+ MUL15(a[ 1], a[16])
+		+ MUL15(a[ 2], a[15])
+		+ MUL15(a[ 3], a[14])
+		+ MUL15(a[ 4], a[13])
+		+ MUL15(a[ 5], a[12])
+		+ MUL15(a[ 6], a[11])
+		+ MUL15(a[ 7], a[10])
+		+ MUL15(a[ 8], a[ 9])) << 1);
+	t[18] = MUL15(a[ 9], a[ 9])
+		+ ((MUL15(a[ 0], a[18])
+		+ MUL15(a[ 1], a[17])
+		+ MUL15(a[ 2], a[16])
+		+ MUL15(a[ 3], a[15])
+		+ MUL15(a[ 4], a[14])
+		+ MUL15(a[ 5], a[13])
+		+ MUL15(a[ 6], a[12])
+		+ MUL15(a[ 7], a[11])
+		+ MUL15(a[ 8], a[10])) << 1);
+	t[19] = ((MUL15(a[ 0], a[19])
+		+ MUL15(a[ 1], a[18])
+		+ MUL15(a[ 2], a[17])
+		+ MUL15(a[ 3], a[16])
+		+ MUL15(a[ 4], a[15])
+		+ MUL15(a[ 5], a[14])
+		+ MUL15(a[ 6], a[13])
+		+ MUL15(a[ 7], a[12])
+		+ MUL15(a[ 8], a[11])
+		+ MUL15(a[ 9], a[10])) << 1);
+	t[20] = MUL15(a[10], a[10])
+		+ ((MUL15(a[ 1], a[19])
+		+ MUL15(a[ 2], a[18])
+		+ MUL15(a[ 3], a[17])
+		+ MUL15(a[ 4], a[16])
+		+ MUL15(a[ 5], a[15])
+		+ MUL15(a[ 6], a[14])
+		+ MUL15(a[ 7], a[13])
+		+ MUL15(a[ 8], a[12])
+		+ MUL15(a[ 9], a[11])) << 1);
+	t[21] = ((MUL15(a[ 2], a[19])
+		+ MUL15(a[ 3], a[18])
+		+ MUL15(a[ 4], a[17])
+		+ MUL15(a[ 5], a[16])
+		+ MUL15(a[ 6], a[15])
+		+ MUL15(a[ 7], a[14])
+		+ MUL15(a[ 8], a[13])
+		+ MUL15(a[ 9], a[12])
+		+ MUL15(a[10], a[11])) << 1);
+	t[22] = MUL15(a[11], a[11])
+		+ ((MUL15(a[ 3], a[19])
+		+ MUL15(a[ 4], a[18])
+		+ MUL15(a[ 5], a[17])
+		+ MUL15(a[ 6], a[16])
+		+ MUL15(a[ 7], a[15])
+		+ MUL15(a[ 8], a[14])
+		+ MUL15(a[ 9], a[13])
+		+ MUL15(a[10], a[12])) << 1);
+	t[23] = ((MUL15(a[ 4], a[19])
+		+ MUL15(a[ 5], a[18])
+		+ MUL15(a[ 6], a[17])
+		+ MUL15(a[ 7], a[16])
+		+ MUL15(a[ 8], a[15])
+		+ MUL15(a[ 9], a[14])
+		+ MUL15(a[10], a[13])
+		+ MUL15(a[11], a[12])) << 1);
+	t[24] = MUL15(a[12], a[12])
+		+ ((MUL15(a[ 5], a[19])
+		+ MUL15(a[ 6], a[18])
+		+ MUL15(a[ 7], a[17])
+		+ MUL15(a[ 8], a[16])
+		+ MUL15(a[ 9], a[15])
+		+ MUL15(a[10], a[14])
+		+ MUL15(a[11], a[13])) << 1);
+	t[25] = ((MUL15(a[ 6], a[19])
+		+ MUL15(a[ 7], a[18])
+		+ MUL15(a[ 8], a[17])
+		+ MUL15(a[ 9], a[16])
+		+ MUL15(a[10], a[15])
+		+ MUL15(a[11], a[14])
+		+ MUL15(a[12], a[13])) << 1);
+	t[26] = MUL15(a[13], a[13])
+		+ ((MUL15(a[ 7], a[19])
+		+ MUL15(a[ 8], a[18])
+		+ MUL15(a[ 9], a[17])
+		+ MUL15(a[10], a[16])
+		+ MUL15(a[11], a[15])
+		+ MUL15(a[12], a[14])) << 1);
+	t[27] = ((MUL15(a[ 8], a[19])
+		+ MUL15(a[ 9], a[18])
+		+ MUL15(a[10], a[17])
+		+ MUL15(a[11], a[16])
+		+ MUL15(a[12], a[15])
+		+ MUL15(a[13], a[14])) << 1);
+	t[28] = MUL15(a[14], a[14])
+		+ ((MUL15(a[ 9], a[19])
+		+ MUL15(a[10], a[18])
+		+ MUL15(a[11], a[17])
+		+ MUL15(a[12], a[16])
+		+ MUL15(a[13], a[15])) << 1);
+	t[29] = ((MUL15(a[10], a[19])
+		+ MUL15(a[11], a[18])
+		+ MUL15(a[12], a[17])
+		+ MUL15(a[13], a[16])
+		+ MUL15(a[14], a[15])) << 1);
+	t[30] = MUL15(a[15], a[15])
+		+ ((MUL15(a[11], a[19])
+		+ MUL15(a[12], a[18])
+		+ MUL15(a[13], a[17])
+		+ MUL15(a[14], a[16])) << 1);
+	t[31] = ((MUL15(a[12], a[19])
+		+ MUL15(a[13], a[18])
+		+ MUL15(a[14], a[17])
+		+ MUL15(a[15], a[16])) << 1);
+	t[32] = MUL15(a[16], a[16])
+		+ ((MUL15(a[13], a[19])
+		+ MUL15(a[14], a[18])
+		+ MUL15(a[15], a[17])) << 1);
+	t[33] = ((MUL15(a[14], a[19])
+		+ MUL15(a[15], a[18])
+		+ MUL15(a[16], a[17])) << 1);
+	t[34] = MUL15(a[17], a[17])
+		+ ((MUL15(a[15], a[19])
+		+ MUL15(a[16], a[18])) << 1);
+	t[35] = ((MUL15(a[16], a[19])
+		+ MUL15(a[17], a[18])) << 1);
+	t[36] = MUL15(a[18], a[18])
+		+ ((MUL15(a[17], a[19])) << 1);
+	t[37] = ((MUL15(a[18], a[19])) << 1);
+	t[38] = MUL15(a[19], a[19]);
+	d[39] = norm13(d, t, 39);
+}
+
+#endif
+
+/*
+ * Modulus for field F256 (field for point coordinates in curve P-256).
+ */
+static const uint32_t F256[] = {
+	0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x1FFF, 0x001F,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0400, 0x0000,
+	0x0000, 0x1FF8, 0x1FFF, 0x01FF
+};
+
+/*
+ * The 'b' curve equation coefficient for P-256.
+ */
+static const uint32_t P256_B[] = {
+	0x004B, 0x1E93, 0x0F89, 0x1C78, 0x03BC, 0x187B, 0x114E, 0x1619,
+	0x1D06, 0x0328, 0x01AF, 0x0D31, 0x1557, 0x15DE, 0x1ECF, 0x127C,
+	0x0A3A, 0x0EC5, 0x118D, 0x00B5
+};
+
+/*
+ * Perform a "short reduction" in field F256 (field for curve P-256).
+ * The source value should be less than 262 bits; on output, it will
+ * be at most 257 bits, and less than twice the modulus.
+ */
+static void
+reduce_f256(uint32_t *d)
+{
+	uint32_t x;
+
+	x = d[19] >> 9;
+	d[19] &= 0x01FF;
+	d[17] += x << 3;
+	d[14] -= x << 10;
+	d[7] -= x << 5;
+	d[0] += x;
+	norm13(d, d, 20);
+}
+
+/*
+ * Perform a "final reduction" in field F256 (field for curve P-256).
+ * The source value must be less than twice the modulus. If the value
+ * is not lower than the modulus, then the modulus is subtracted and
+ * this function returns 1; otherwise, it leaves it untouched and it
+ * returns 0.
+ */
+static uint32_t
+reduce_final_f256(uint32_t *d)
+{
+	uint32_t t[20];
+	uint32_t cc;
+	int i;
+
+	memcpy(t, d, sizeof t);
+	cc = 0;
+	for (i = 0; i < 20; i ++) {
+		uint32_t w;
+
+		w = t[i] - F256[i] - cc;
+		cc = w >> 31;
+		t[i] = w & 0x1FFF;
+	}
+	cc ^= 1;
+	CCOPY(cc, d, t, sizeof t);
+	return cc;
+}
+
+/*
+ * Perform a multiplication of two integers modulo
+ * 2^256-2^224+2^192+2^96-1 (for NIST curve P-256). Operands are arrays
+ * of 20 words, each containing 13 bits of data, in little-endian order.
+ * On input, upper word may be up to 13 bits (hence value up to 2^260-1);
+ * on output, value fits on 257 bits and is lower than twice the modulus.
+ */
+static void
+mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[40], cc;
+	int i;
+
+	/*
+	 * Compute raw multiplication. All result words fit in 13 bits
+	 * each.
+	 */
+	mul20(t, a, b);
+
+	/*
+	 * Modular reduction: each high word in added/subtracted where
+	 * necessary.
+	 *
+	 * The modulus is:
+	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1
+	 * Therefore:
+	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * For a word x at bit offset n (n >= 256), we have:
+	 *    x*2^n = x*2^(n-32) - x*2^(n-64)
+	 *            - x*2^(n - 160) + x*2^(n-256) mod p
+	 *
+	 * Thus, we can nullify the high word if we reinject it at some
+	 * proper emplacements.
+	 */
+	for (i = 39; i >= 20; i --) {
+		uint32_t x;
+
+		x = t[i];
+		t[i - 2] += ARSH(x, 6);
+		t[i - 3] += (x << 7) & 0x1FFF;
+		t[i - 4] -= ARSH(x, 12);
+		t[i - 5] -= (x << 1) & 0x1FFF;
+		t[i - 12] -= ARSH(x, 4);
+		t[i - 13] -= (x << 9) & 0x1FFF;
+		t[i - 19] += ARSH(x, 9);
+		t[i - 20] += (x << 4) & 0x1FFF;
+	}
+
+	/*
+	 * Propagate carries. This is a signed propagation, and the
+	 * result may be negative. The loop above may enlarge values,
+	 * but not two much: worst case is the chain involving t[i - 3],
+	 * in which a value may be added to itself up to 7 times. Since
+	 * starting values are 13-bit each, all words fit on 20 bits
+	 * (21 to account for the sign bit).
+	 */
+	cc = norm13(t, t, 20);
+
+	/*
+	 * Perform modular reduction again for the bits beyond 256 (the carry
+	 * and the bits 256..259). Since the largest shift below is by 10
+	 * bits, and the values fit on 21 bits, values fit in 32-bit words,
+	 * thereby allowing injecting full word values.
+	 */
+	cc = (cc << 4) | (t[19] >> 9);
+	t[19] &= 0x01FF;
+	t[17] += cc << 3;
+	t[14] -= cc << 10;
+	t[7] -= cc << 5;
+	t[0] += cc;
+
+	/*
+	 * If the carry is negative, then after carry propagation, we may
+	 * end up with a value which is negative, and we don't want that.
+	 * Thus, in that case, we add the modulus. Note that the subtraction
+	 * result, when the carry is negative, is always smaller than the
+	 * modulus, so the extra addition will not make the value exceed
+	 * twice the modulus.
+	 */
+	cc >>= 31;
+	t[0] -= cc;
+	t[7] += cc << 5;
+	t[14] += cc << 10;
+	t[17] -= cc << 3;
+	t[19] += cc << 9;
+
+	norm13(d, t, 20);
+}
+
+/*
+ * Square an integer modulo 2^256-2^224+2^192+2^96-1 (for NIST curve
+ * P-256). Operand is an array of 20 words, each containing 13 bits of
+ * data, in little-endian order. On input, upper word may be up to 13
+ * bits (hence value up to 2^260-1); on output, value fits on 257 bits
+ * and is lower than twice the modulus.
+ */
+static void
+square_f256(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[40], cc;
+	int i;
+
+	/*
+	 * Compute raw square. All result words fit in 13 bits each.
+	 */
+	square20(t, a);
+
+	/*
+	 * Modular reduction: each high word in added/subtracted where
+	 * necessary.
+	 *
+	 * The modulus is:
+	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1
+	 * Therefore:
+	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * For a word x at bit offset n (n >= 256), we have:
+	 *    x*2^n = x*2^(n-32) - x*2^(n-64)
+	 *            - x*2^(n - 160) + x*2^(n-256) mod p
+	 *
+	 * Thus, we can nullify the high word if we reinject it at some
+	 * proper emplacements.
+	 */
+	for (i = 39; i >= 20; i --) {
+		uint32_t x;
+
+		x = t[i];
+		t[i - 2] += ARSH(x, 6);
+		t[i - 3] += (x << 7) & 0x1FFF;
+		t[i - 4] -= ARSH(x, 12);
+		t[i - 5] -= (x << 1) & 0x1FFF;
+		t[i - 12] -= ARSH(x, 4);
+		t[i - 13] -= (x << 9) & 0x1FFF;
+		t[i - 19] += ARSH(x, 9);
+		t[i - 20] += (x << 4) & 0x1FFF;
+	}
+
+	/*
+	 * Propagate carries. This is a signed propagation, and the
+	 * result may be negative. The loop above may enlarge values,
+	 * but not two much: worst case is the chain involving t[i - 3],
+	 * in which a value may be added to itself up to 7 times. Since
+	 * starting values are 13-bit each, all words fit on 20 bits
+	 * (21 to account for the sign bit).
+	 */
+	cc = norm13(t, t, 20);
+
+	/*
+	 * Perform modular reduction again for the bits beyond 256 (the carry
+	 * and the bits 256..259). Since the largest shift below is by 10
+	 * bits, and the values fit on 21 bits, values fit in 32-bit words,
+	 * thereby allowing injecting full word values.
+	 */
+	cc = (cc << 4) | (t[19] >> 9);
+	t[19] &= 0x01FF;
+	t[17] += cc << 3;
+	t[14] -= cc << 10;
+	t[7] -= cc << 5;
+	t[0] += cc;
+
+	/*
+	 * If the carry is negative, then after carry propagation, we may
+	 * end up with a value which is negative, and we don't want that.
+	 * Thus, in that case, we add the modulus. Note that the subtraction
+	 * result, when the carry is negative, is always smaller than the
+	 * modulus, so the extra addition will not make the value exceed
+	 * twice the modulus.
+	 */
+	cc >>= 31;
+	t[0] -= cc;
+	t[7] += cc << 5;
+	t[14] += cc << 10;
+	t[17] -= cc << 3;
+	t[19] += cc << 9;
+
+	norm13(d, t, 20);
+}
+
+/*
+ * Jacobian coordinates for a point in P-256: affine coordinates (X,Y)
+ * are such that:
+ *   X = x / z^2
+ *   Y = y / z^3
+ * For the point at infinity, z = 0.
+ * Each point thus admits many possible representations.
+ *
+ * Coordinates are represented in arrays of 32-bit integers, each holding
+ * 13 bits of data. Values may also be slightly greater than the modulus,
+ * but they will always be lower than twice the modulus.
+ */
+typedef struct {
+	uint32_t x[20];
+	uint32_t y[20];
+	uint32_t z[20];
+} p256_jacobian;
+
+/*
+ * Convert a point to affine coordinates:
+ *  - If the point is the point at infinity, then all three coordinates
+ *    are set to 0.
+ *  - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y'
+ *    coordinates are the 'X' and 'Y' affine coordinates.
+ * The coordinates are guaranteed to be lower than the modulus.
+ */
+static void
+p256_to_affine(p256_jacobian *P)
+{
+	uint32_t t1[20], t2[20];
+	int i;
+
+	/*
+	 * Invert z with a modular exponentiation: the modulus is
+	 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is
+	 * p-2. Exponent bit pattern (from high to low) is:
+	 *  - 32 bits of value 1
+	 *  - 31 bits of value 0
+	 *  - 1 bit of value 1
+	 *  - 96 bits of value 0
+	 *  - 94 bits of value 1
+	 *  - 1 bit of value 0
+	 *  - 1 bit of value 1
+	 * Thus, we precompute z^(2^31-1) to speed things up.
+	 *
+	 * If z = 0 (point at infinity) then the modular exponentiation
+	 * will yield 0, which leads to the expected result (all three
+	 * coordinates set to 0).
+	 */
+
+	/*
+	 * A simple square-and-multiply for z^(2^31-1). We could save about
+	 * two dozen multiplications here with an addition chain, but
+	 * this would require a bit more code, and extra stack buffers.
+	 */
+	memcpy(t1, P->z, sizeof P->z);
+	for (i = 0; i < 30; i ++) {
+		square_f256(t1, t1);
+		mul_f256(t1, t1, P->z);
+	}
+
+	/*
+	 * Square-and-multiply. Apart from the squarings, we have a few
+	 * multiplications to set bits to 1; we multiply by the original z
+	 * for setting 1 bit, and by t1 for setting 31 bits.
+	 */
+	memcpy(t2, P->z, sizeof P->z);
+	for (i = 1; i < 256; i ++) {
+		square_f256(t2, t2);
+		switch (i) {
+		case 31:
+		case 190:
+		case 221:
+		case 252:
+			mul_f256(t2, t2, t1);
+			break;
+		case 63:
+		case 253:
+		case 255:
+			mul_f256(t2, t2, P->z);
+			break;
+		}
+	}
+
+	/*
+	 * Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3.
+	 */
+	mul_f256(t1, t2, t2);
+	mul_f256(P->x, t1, P->x);
+	mul_f256(t1, t1, t2);
+	mul_f256(P->y, t1, P->y);
+	reduce_final_f256(P->x);
+	reduce_final_f256(P->y);
+
+	/*
+	 * Multiply z by 1/z. If z = 0, then this will yield 0, otherwise
+	 * this will set z to 1.
+	 */
+	mul_f256(P->z, P->z, t2);
+	reduce_final_f256(P->z);
+}
+
+/*
+ * Double a point in P-256. This function works for all valid points,
+ * including the point at infinity.
+ */
+static void
+p256_double(p256_jacobian *Q)
+{
+	/*
+	 * Doubling formulas are:
+	 *
+	 *   s = 4*x*y^2
+	 *   m = 3*(x + z^2)*(x - z^2)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y^4
+	 *   z' = 2*y*z
+	 *
+	 * These formulas work for all points, including points of order 2
+	 * and points at infinity:
+	 *   - If y = 0 then z' = 0. But there is no such point in P-256
+	 *     anyway.
+	 *   - If z = 0 then z' = 0.
+	 */
+	uint32_t t1[20], t2[20], t3[20], t4[20];
+	int i;
+
+	/*
+	 * Compute z^2 in t1.
+	 */
+	square_f256(t1, Q->z);
+
+	/*
+	 * Compute x-z^2 in t2 and x+z^2 in t1.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t2[i] = (F256[i] << 1) + Q->x[i] - t1[i];
+		t1[i] += Q->x[i];
+	}
+	norm13(t1, t1, 20);
+	norm13(t2, t2, 20);
+
+	/*
+	 * Compute 3*(x+z^2)*(x-z^2) in t1.
+	 */
+	mul_f256(t3, t1, t2);
+	for (i = 0; i < 20; i ++) {
+		t1[i] = MUL15(3, t3[i]);
+	}
+	norm13(t1, t1, 20);
+
+	/*
+	 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	square_f256(t3, Q->y);
+	for (i = 0; i < 20; i ++) {
+		t3[i] <<= 1;
+	}
+	norm13(t3, t3, 20);
+	mul_f256(t2, Q->x, t3);
+	for (i = 0; i < 20; i ++) {
+		t2[i] <<= 1;
+	}
+	norm13(t2, t2, 20);
+	reduce_f256(t2);
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	square_f256(Q->x, t1);
+	for (i = 0; i < 20; i ++) {
+		Q->x[i] += (F256[i] << 2) - (t2[i] << 1);
+	}
+	norm13(Q->x, Q->x, 20);
+	reduce_f256(Q->x);
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	mul_f256(t4, Q->y, Q->z);
+	for (i = 0; i < 20; i ++) {
+		Q->z[i] = t4[i] << 1;
+	}
+	norm13(Q->z, Q->z, 20);
+	reduce_f256(Q->z);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t2[i] += (F256[i] << 1) - Q->x[i];
+	}
+	norm13(t2, t2, 20);
+	mul_f256(Q->y, t1, t2);
+	square_f256(t4, t3);
+	for (i = 0; i < 20; i ++) {
+		Q->y[i] += (F256[i] << 2) - (t4[i] << 1);
+	}
+	norm13(Q->y, Q->y, 20);
+	reduce_f256(Q->y);
+}
+
+/*
+ * Add point P2 to point P1.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0 but P2 != 0
+ *   - If P1 != 0 but P2 == 0
+ *   - If P1 == P2
+ *
+ * In all three cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate
+ *   - P1 == 0 and P2 == 0
+ *   - The Y coordinate of one of the points is 0 and the other point is
+ *     the point at infinity.
+ *
+ * The third case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ */
+static uint32_t
+p256_add(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1 * z2^2
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1 * z2^3
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1 * z2
+	 */
+	uint32_t t1[20], t2[20], t3[20], t4[20], t5[20], t6[20], t7[20];
+	uint32_t ret;
+	int i;
+
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	square_f256(t3, P2->z);
+	mul_f256(t1, P1->x, t3);
+	mul_f256(t4, P2->z, t3);
+	mul_f256(t3, P1->y, t4);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	square_f256(t4, P1->z);
+	mul_f256(t2, P2->x, t4);
+	mul_f256(t5, P1->z, t4);
+	mul_f256(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t2[i] += (F256[i] << 1) - t1[i];
+		t4[i] += (F256[i] << 1) - t3[i];
+	}
+	norm13(t2, t2, 20);
+	norm13(t4, t4, 20);
+	reduce_f256(t4);
+	reduce_final_f256(t4);
+	ret = 0;
+	for (i = 0; i < 20; i ++) {
+		ret |= t4[i];
+	}
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	square_f256(t7, t2);
+	mul_f256(t6, t1, t7);
+	mul_f256(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	square_f256(P1->x, t4);
+	for (i = 0; i < 20; i ++) {
+		P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1);
+	}
+	norm13(P1->x, P1->x, 20);
+	reduce_f256(P1->x);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t6[i] += (F256[i] << 1) - P1->x[i];
+	}
+	norm13(t6, t6, 20);
+	mul_f256(P1->y, t4, t6);
+	mul_f256(t1, t5, t3);
+	for (i = 0; i < 20; i ++) {
+		P1->y[i] += (F256[i] << 1) - t1[i];
+	}
+	norm13(P1->y, P1->y, 20);
+	reduce_f256(P1->y);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	mul_f256(t1, P1->z, P2->z);
+	mul_f256(P1->z, t1, t2);
+
+	return ret;
+}
+
+/*
+ * Add point P2 to point P1. This is a specialised function for the
+ * case when P2 is a non-zero point in affine coordinate.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0
+ *   - If P1 == P2
+ *
+ * In both cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate
+ *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
+ *
+ * The second case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ */
+static uint32_t
+p256_add_mixed(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 */
+	uint32_t t1[20], t2[20], t3[20], t4[20], t5[20], t6[20], t7[20];
+	uint32_t ret;
+	int i;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	square_f256(t4, P1->z);
+	mul_f256(t2, P2->x, t4);
+	mul_f256(t5, P1->z, t4);
+	mul_f256(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t2[i] += (F256[i] << 1) - t1[i];
+		t4[i] += (F256[i] << 1) - t3[i];
+	}
+	norm13(t2, t2, 20);
+	norm13(t4, t4, 20);
+	reduce_f256(t4);
+	reduce_final_f256(t4);
+	ret = 0;
+	for (i = 0; i < 20; i ++) {
+		ret |= t4[i];
+	}
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	square_f256(t7, t2);
+	mul_f256(t6, t1, t7);
+	mul_f256(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	square_f256(P1->x, t4);
+	for (i = 0; i < 20; i ++) {
+		P1->x[i] += (F256[i] << 3) - t5[i] - (t6[i] << 1);
+	}
+	norm13(P1->x, P1->x, 20);
+	reduce_f256(P1->x);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	for (i = 0; i < 20; i ++) {
+		t6[i] += (F256[i] << 1) - P1->x[i];
+	}
+	norm13(t6, t6, 20);
+	mul_f256(P1->y, t4, t6);
+	mul_f256(t1, t5, t3);
+	for (i = 0; i < 20; i ++) {
+		P1->y[i] += (F256[i] << 1) - t1[i];
+	}
+	norm13(P1->y, P1->y, 20);
+	reduce_f256(P1->y);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	mul_f256(P1->z, P1->z, t2);
+
+	return ret;
+}
+
+/*
+ * Decode a P-256 point. This function does not support the point at
+ * infinity. Returned value is 0 if the point is invalid, 1 otherwise.
+ */
+static uint32_t
+p256_decode(p256_jacobian *P, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t tx[20], ty[20], t1[20], t2[20];
+	uint32_t bad;
+	int i;
+
+	if (len != 65) {
+		return 0;
+	}
+	buf = src;
+
+	/*
+	 * First byte must be 0x04 (uncompressed format). We could support
+	 * "hybrid format" (first byte is 0x06 or 0x07, and encodes the
+	 * least significant bit of the Y coordinate), but it is explicitly
+	 * forbidden by RFC 5480 (section 2.2).
+	 */
+	bad = NEQ(buf[0], 0x04);
+
+	/*
+	 * Decode the coordinates, and check that they are both lower
+	 * than the modulus.
+	 */
+	tx[19] = be8_to_le13(tx, buf + 1, 32);
+	ty[19] = be8_to_le13(ty, buf + 33, 32);
+	bad |= reduce_final_f256(tx);
+	bad |= reduce_final_f256(ty);
+
+	/*
+	 * Check curve equation.
+	 */
+	square_f256(t1, tx);
+	mul_f256(t1, tx, t1);
+	square_f256(t2, ty);
+	for (i = 0; i < 20; i ++) {
+		t1[i] += (F256[i] << 3) - MUL15(3, tx[i]) + P256_B[i] - t2[i];
+	}
+	norm13(t1, t1, 20);
+	reduce_f256(t1);
+	reduce_final_f256(t1);
+	for (i = 0; i < 20; i ++) {
+		bad |= t1[i];
+	}
+
+	/*
+	 * Copy coordinates to the point structure.
+	 */
+	memcpy(P->x, tx, sizeof tx);
+	memcpy(P->y, ty, sizeof ty);
+	memset(P->z, 0, sizeof P->z);
+	P->z[0] = 1;
+	return EQ(bad, 0);
+}
+
+/*
+ * Encode a point into a buffer. This function assumes that the point is
+ * valid, in affine coordinates, and not the point at infinity.
+ */
+static void
+p256_encode(void *dst, const p256_jacobian *P)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = 0x04;
+	le13_to_be8(buf + 1, 32, P->x);
+	le13_to_be8(buf + 33, 32, P->y);
+}
+
+/*
+ * Multiply a curve point by an integer. The integer is assumed to be
+ * lower than the curve order, and the base point must not be the point
+ * at infinity.
+ */
+static void
+p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen)
+{
+	/*
+	 * qz is a flag that is initially 1, and remains equal to 1
+	 * as long as the point is the point at infinity.
+	 *
+	 * We use a 2-bit window to handle multiplier bits by pairs.
+	 * The precomputed window really is the points P2 and P3.
+	 */
+	uint32_t qz;
+	p256_jacobian P2, P3, Q, T, U;
+
+	/*
+	 * Compute window values.
+	 */
+	P2 = *P;
+	p256_double(&P2);
+	P3 = *P;
+	p256_add(&P3, &P2);
+
+	/*
+	 * We start with Q = 0. We process multiplier bits 2 by 2.
+	 */
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			T = *P;
+			U = Q;
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			p256_add(&U, &T);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	*P = Q;
+}
+
+/*
+ * Precomputed window: k*G points, where G is the curve generator, and k
+ * is an integer from 1 to 15 (inclusive). The X and Y coordinates of
+ * the point are encoded as 20 words of 13 bits each (little-endian
+ * order); 13-bit words are then grouped 2-by-2 into 32-bit words
+ * (little-endian order within each word).
+ */
+static const uint32_t Gwin[15][20] = {
+
+	{ 0x04C60296, 0x02721176, 0x19D00F4A, 0x102517AC,
+	  0x13B8037D, 0x0748103C, 0x1E730E56, 0x08481FE2,
+	  0x0F97012C, 0x00D605F4, 0x1DFA11F5, 0x0C801A0D,
+	  0x0F670CBB, 0x0AED0CC5, 0x115E0E33, 0x181F0785,
+	  0x13F514A7, 0x0FF30E3B, 0x17171E1A, 0x009F18D0 },
+
+	{ 0x1B341978, 0x16911F11, 0x0D9A1A60, 0x1C4E1FC8,
+	  0x1E040969, 0x096A06B0, 0x091C0030, 0x09EF1A29,
+	  0x18C40D03, 0x00F91C9E, 0x13C313D1, 0x096F0748,
+	  0x011419E0, 0x1CC713A6, 0x1DD31DAD, 0x1EE80C36,
+	  0x1ECD0C69, 0x1A0800A4, 0x08861B8E, 0x000E1DD5 },
+
+	{ 0x173F1D6C, 0x02CC06F1, 0x14C21FB4, 0x043D1EB6,
+	  0x0F3606B7, 0x1A971C59, 0x1BF71951, 0x01481323,
+	  0x068D0633, 0x00BD12F9, 0x13EA1032, 0x136209E8,
+	  0x1C1E19A7, 0x06C7013E, 0x06C10AB0, 0x14C908BB,
+	  0x05830CE1, 0x1FEF18DD, 0x00620998, 0x010E0D19 },
+
+	{ 0x18180852, 0x0604111A, 0x0B771509, 0x1B6F0156,
+	  0x00181FE2, 0x1DCC0AF4, 0x16EF0659, 0x11F70E80,
+	  0x11A912D0, 0x01C414D2, 0x027618C6, 0x05840FC6,
+	  0x100215C4, 0x187E0C3B, 0x12771C96, 0x150C0B5D,
+	  0x0FF705FD, 0x07981C67, 0x1AD20C63, 0x01C11C55 },
+
+	{ 0x1E8113ED, 0x0A940370, 0x12920215, 0x1FA31D6F,
+	  0x1F7C0C82, 0x10CD03F7, 0x02640560, 0x081A0B5E,
+	  0x1BD21151, 0x00A21642, 0x0D0B0DA4, 0x0176113F,
+	  0x04440D1D, 0x001A1360, 0x1068012F, 0x1F141E49,
+	  0x10DF136B, 0x0E4F162B, 0x0D44104A, 0x01C1105F },
+
+	{ 0x011411A9, 0x01551A4F, 0x0ADA0C6B, 0x01BD0EC8,
+	  0x18120C74, 0x112F1778, 0x099202CB, 0x0C05124B,
+	  0x195316A4, 0x01600685, 0x1E3B1FE2, 0x189014E3,
+	  0x0B5E1FD7, 0x0E0311F8, 0x08E000F7, 0x174E00DE,
+	  0x160702DF, 0x1B5A15BF, 0x03A11237, 0x01D01704 },
+
+	{ 0x0C3D12A3, 0x0C501C0C, 0x17AD1300, 0x1715003F,
+	  0x03F719F8, 0x18031ED8, 0x1D980667, 0x0F681896,
+	  0x1B7D00BF, 0x011C14CE, 0x0FA000B4, 0x1C3501B0,
+	  0x0D901C55, 0x06790C10, 0x029E0736, 0x0DEB0400,
+	  0x034F183A, 0x030619B4, 0x0DEF0033, 0x00E71AC7 },
+
+	{ 0x1B7D1393, 0x1B3B1076, 0x0BED1B4D, 0x13011F3A,
+	  0x0E0E1238, 0x156A132B, 0x013A02D3, 0x160A0D01,
+	  0x1CED1EE9, 0x00C5165D, 0x184C157E, 0x08141A83,
+	  0x153C0DA5, 0x1ED70F9D, 0x05170D51, 0x02CF13B8,
+	  0x18AE1771, 0x1B04113F, 0x05EC11E9, 0x015A16B3 },
+
+	{ 0x04A41EE0, 0x1D1412E4, 0x1C591D79, 0x118511B7,
+	  0x14F00ACB, 0x1AE31E1C, 0x049C0D51, 0x016E061E,
+	  0x1DB71EDF, 0x01D41A35, 0x0E8208FA, 0x14441293,
+	  0x011F1E85, 0x1D54137A, 0x026B114F, 0x151D0832,
+	  0x00A50964, 0x1F9C1E1C, 0x064B12C9, 0x005409D1 },
+
+	{ 0x062B123F, 0x0C0D0501, 0x183704C3, 0x08E31120,
+	  0x0A2E0A6C, 0x14440FED, 0x090A0D1E, 0x13271964,
+	  0x0B590A3A, 0x019D1D9B, 0x05780773, 0x09770A91,
+	  0x0F770CA3, 0x053F19D4, 0x02C80DED, 0x1A761304,
+	  0x091E0DD9, 0x15D201B8, 0x151109AA, 0x010F0198 },
+
+	{ 0x05E101D1, 0x072314DD, 0x045F1433, 0x1A041541,
+	  0x10B3142E, 0x01840736, 0x1C1B19DB, 0x098B0418,
+	  0x1DBC083B, 0x007D1444, 0x01511740, 0x11DD1F3A,
+	  0x04ED0E2F, 0x1B4B1A62, 0x10480D04, 0x09E911A2,
+	  0x04211AFA, 0x19140893, 0x04D60CC4, 0x01210648 },
+
+	{ 0x112703C4, 0x018B1BA1, 0x164C1D50, 0x05160BE0,
+	  0x0BCC1830, 0x01CB1554, 0x13291732, 0x1B2B1918,
+	  0x0DED0817, 0x00E80775, 0x0A2401D3, 0x0BFE08B3,
+	  0x0E531199, 0x058616E9, 0x04770B91, 0x110F0C55,
+	  0x19C11554, 0x0BFB1159, 0x03541C38, 0x000E1C2D },
+
+	{ 0x10390C01, 0x02BB0751, 0x0AC5098E, 0x096C17AB,
+	  0x03C90E28, 0x10BD18BF, 0x002E1F2D, 0x092B0986,
+	  0x1BD700AC, 0x002E1F20, 0x1E3D1FD8, 0x077718BB,
+	  0x06F919C4, 0x187407ED, 0x11370E14, 0x081E139C,
+	  0x00481ADB, 0x14AB0289, 0x066A0EBE, 0x00C70ED6 },
+
+	{ 0x0694120B, 0x124E1CC9, 0x0E2F0570, 0x17CF081A,
+	  0x078906AC, 0x066D17CF, 0x1B3207F4, 0x0C5705E9,
+	  0x10001C38, 0x00A919DE, 0x06851375, 0x0F900BD8,
+	  0x080401BA, 0x0EEE0D42, 0x1B8B11EA, 0x0B4519F0,
+	  0x090F18C0, 0x062E1508, 0x0DD909F4, 0x01EB067C },
+
+	{ 0x0CDC1D5F, 0x0D1818F9, 0x07781636, 0x125B18E8,
+	  0x0D7003AF, 0x13110099, 0x1D9B1899, 0x175C1EB7,
+	  0x0E34171A, 0x01E01153, 0x081A0F36, 0x0B391783,
+	  0x1D1F147E, 0x19CE16D7, 0x11511B21, 0x1F2C10F9,
+	  0x12CA0E51, 0x05A31D39, 0x171A192E, 0x016B0E4F }
+};
+
+/*
+ * Lookup one of the Gwin[] values, by index. This is constant-time.
+ */
+static void
+lookup_Gwin(p256_jacobian *T, uint32_t idx)
+{
+	uint32_t xy[20];
+	uint32_t k;
+	size_t u;
+
+	memset(xy, 0, sizeof xy);
+	for (k = 0; k < 15; k ++) {
+		uint32_t m;
+
+		m = -EQ(idx, k + 1);
+		for (u = 0; u < 20; u ++) {
+			xy[u] |= m & Gwin[k][u];
+		}
+	}
+	for (u = 0; u < 10; u ++) {
+		T->x[(u << 1) + 0] = xy[u] & 0xFFFF;
+		T->x[(u << 1) + 1] = xy[u] >> 16;
+		T->y[(u << 1) + 0] = xy[u + 10] & 0xFFFF;
+		T->y[(u << 1) + 1] = xy[u + 10] >> 16;
+	}
+	memset(T->z, 0, sizeof T->z);
+	T->z[0] = 1;
+}
+
+/*
+ * Multiply the generator by an integer. The integer is assumed non-zero
+ * and lower than the curve order.
+ */
+static void
+p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen)
+{
+	/*
+	 * qz is a flag that is initially 1, and remains equal to 1
+	 * as long as the point is the point at infinity.
+	 *
+	 * We use a 4-bit window to handle multiplier bits by groups
+	 * of 4. The precomputed window is constant static data, with
+	 * points in affine coordinates; we use a constant-time lookup.
+	 */
+	p256_jacobian Q;
+	uint32_t qz;
+
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+		unsigned bx;
+
+		bx = *x ++;
+		for (k = 0; k < 2; k ++) {
+			uint32_t bits;
+			uint32_t bnz;
+			p256_jacobian T, U;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			bits = (bx >> 4) & 0x0F;
+			bnz = NEQ(bits, 0);
+			lookup_Gwin(&T, bits);
+			U = Q;
+			p256_add_mixed(&U, &T);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+			bx <<= 4;
+		}
+	}
+	*P = Q;
+}
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8,
+	0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D,
+	0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8,
+	0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B,
+	0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40,
+	0x68, 0x37, 0xBF, 0x51, 0xF5
+};
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD,
+	0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63,
+	0x25, 0x51
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_G;
+	return P256_G;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_N;
+	return P256_N;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	p256_jacobian P;
+
+	(void)curve;
+	if (Glen != 65) {
+		return 0;
+	}
+	r = p256_decode(&P, G, Glen);
+	p256_mul(&P, x, xlen);
+	p256_to_affine(&P);
+	p256_encode(G, &P);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	p256_jacobian P;
+
+	(void)curve;
+	p256_mulgen(&P, x, xlen);
+	p256_to_affine(&P);
+	p256_encode(R, &P);
+	return 65;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	p256_jacobian P, Q;
+	uint32_t r, t, z;
+	int i;
+
+	(void)curve;
+	if (len != 65) {
+		return 0;
+	}
+	r = p256_decode(&P, A, len);
+	p256_mul(&P, x, xlen);
+	if (B == NULL) {
+		p256_mulgen(&Q, y, ylen);
+	} else {
+		r &= p256_decode(&Q, B, len);
+		p256_mul(&Q, y, ylen);
+	}
+
+	/*
+	 * The final addition may fail in case both points are equal.
+	 */
+	t = p256_add(&P, &Q);
+	reduce_final_f256(P.z);
+	z = 0;
+	for (i = 0; i < 20; i ++) {
+		z |= P.z[i];
+	}
+	z = EQ(z, 0);
+	p256_double(&Q);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	p256_to_affine(&P);
+	p256_encode(A, &P);
+	r &= ~(z & t);
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_p256_m15 = {
+	(uint32_t)0x00800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_p256_m31.c b/third_party/bearssl/src/ec_p256_m31.c
new file mode 100644
index 0000000..b185937
--- /dev/null
+++ b/third_party/bearssl/src/ec_p256_m31.c
@@ -0,0 +1,1469 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * If BR_NO_ARITH_SHIFT is undefined, or defined to 0, then we _assume_
+ * that right-shifting a signed negative integer copies the sign bit
+ * (arithmetic right-shift). This is "implementation-defined behaviour",
+ * i.e. it is not undefined, but it may differ between compilers. Each
+ * compiler is supposed to document its behaviour in that respect. GCC
+ * explicitly defines that an arithmetic right shift is used. We expect
+ * all other compilers to do the same, because underlying CPU offer an
+ * arithmetic right shift opcode that could not be used otherwise.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)    (((uint32_t)(x) >> (n)) \
+                      | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#define ARSHW(x, n)   (((uint64_t)(x) >> (n)) \
+                      | ((-((uint64_t)(x) >> 63)) << (64 - (n))))
+#else
+#define ARSH(x, n)    ((*(int32_t *)&(x)) >> (n))
+#define ARSHW(x, n)   ((*(int64_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Convert an integer from unsigned big-endian encoding to a sequence of
+ * 30-bit words in little-endian order. The final "partial" word is
+ * returned.
+ */
+static uint32_t
+be8_to_le30(uint32_t *dst, const unsigned char *src, size_t len)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		uint32_t b;
+
+		b = src[len];
+		if (acc_len < 22) {
+			acc |= b << acc_len;
+			acc_len += 8;
+		} else {
+			*dst ++ = (acc | (b << acc_len)) & 0x3FFFFFFF;
+			acc = b >> (30 - acc_len);
+			acc_len -= 22;
+		}
+	}
+	return acc;
+}
+
+/*
+ * Convert an integer (30-bit words, little-endian) to unsigned
+ * big-endian encoding. The total encoding length is provided; all
+ * the destination bytes will be filled.
+ */
+static void
+le30_to_be8(unsigned char *dst, size_t len, const uint32_t *src)
+{
+	uint32_t acc;
+	int acc_len;
+
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			uint32_t w;
+
+			w = *src ++;
+			dst[len] = (unsigned char)(acc | (w << acc_len));
+			acc = w >> (8 - acc_len);
+			acc_len += 22;
+		} else {
+			dst[len] = (unsigned char)acc;
+			acc >>= 8;
+			acc_len -= 8;
+		}
+	}
+}
+
+/*
+ * Multiply two integers. Source integers are represented as arrays of
+ * nine 30-bit words, for values up to 2^270-1. Result is encoded over
+ * 18 words of 30 bits each.
+ */
+static void
+mul9(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	/*
+	 * Maximum intermediate result is no more than
+	 * 10376293531797946367, which fits in 64 bits. Reason:
+	 *
+	 *   10376293531797946367 = 9 * (2^30-1)^2 + 9663676406
+	 *   10376293531797946367 < 9663676407 * 2^30
+	 *
+	 * Thus, adding together 9 products of 30-bit integers, with
+	 * a carry of at most 9663676406, yields an integer that fits
+	 * on 64 bits and generates a carry of at most 9663676406.
+	 */
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], b[0]);
+	t[ 1] = MUL31(a[0], b[1])
+		+ MUL31(a[1], b[0]);
+	t[ 2] = MUL31(a[0], b[2])
+		+ MUL31(a[1], b[1])
+		+ MUL31(a[2], b[0]);
+	t[ 3] = MUL31(a[0], b[3])
+		+ MUL31(a[1], b[2])
+		+ MUL31(a[2], b[1])
+		+ MUL31(a[3], b[0]);
+	t[ 4] = MUL31(a[0], b[4])
+		+ MUL31(a[1], b[3])
+		+ MUL31(a[2], b[2])
+		+ MUL31(a[3], b[1])
+		+ MUL31(a[4], b[0]);
+	t[ 5] = MUL31(a[0], b[5])
+		+ MUL31(a[1], b[4])
+		+ MUL31(a[2], b[3])
+		+ MUL31(a[3], b[2])
+		+ MUL31(a[4], b[1])
+		+ MUL31(a[5], b[0]);
+	t[ 6] = MUL31(a[0], b[6])
+		+ MUL31(a[1], b[5])
+		+ MUL31(a[2], b[4])
+		+ MUL31(a[3], b[3])
+		+ MUL31(a[4], b[2])
+		+ MUL31(a[5], b[1])
+		+ MUL31(a[6], b[0]);
+	t[ 7] = MUL31(a[0], b[7])
+		+ MUL31(a[1], b[6])
+		+ MUL31(a[2], b[5])
+		+ MUL31(a[3], b[4])
+		+ MUL31(a[4], b[3])
+		+ MUL31(a[5], b[2])
+		+ MUL31(a[6], b[1])
+		+ MUL31(a[7], b[0]);
+	t[ 8] = MUL31(a[0], b[8])
+		+ MUL31(a[1], b[7])
+		+ MUL31(a[2], b[6])
+		+ MUL31(a[3], b[5])
+		+ MUL31(a[4], b[4])
+		+ MUL31(a[5], b[3])
+		+ MUL31(a[6], b[2])
+		+ MUL31(a[7], b[1])
+		+ MUL31(a[8], b[0]);
+	t[ 9] = MUL31(a[1], b[8])
+		+ MUL31(a[2], b[7])
+		+ MUL31(a[3], b[6])
+		+ MUL31(a[4], b[5])
+		+ MUL31(a[5], b[4])
+		+ MUL31(a[6], b[3])
+		+ MUL31(a[7], b[2])
+		+ MUL31(a[8], b[1]);
+	t[10] = MUL31(a[2], b[8])
+		+ MUL31(a[3], b[7])
+		+ MUL31(a[4], b[6])
+		+ MUL31(a[5], b[5])
+		+ MUL31(a[6], b[4])
+		+ MUL31(a[7], b[3])
+		+ MUL31(a[8], b[2]);
+	t[11] = MUL31(a[3], b[8])
+		+ MUL31(a[4], b[7])
+		+ MUL31(a[5], b[6])
+		+ MUL31(a[6], b[5])
+		+ MUL31(a[7], b[4])
+		+ MUL31(a[8], b[3]);
+	t[12] = MUL31(a[4], b[8])
+		+ MUL31(a[5], b[7])
+		+ MUL31(a[6], b[6])
+		+ MUL31(a[7], b[5])
+		+ MUL31(a[8], b[4]);
+	t[13] = MUL31(a[5], b[8])
+		+ MUL31(a[6], b[7])
+		+ MUL31(a[7], b[6])
+		+ MUL31(a[8], b[5]);
+	t[14] = MUL31(a[6], b[8])
+		+ MUL31(a[7], b[7])
+		+ MUL31(a[8], b[6]);
+	t[15] = MUL31(a[7], b[8])
+		+ MUL31(a[8], b[7]);
+	t[16] = MUL31(a[8], b[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Square a 270-bit integer, represented as an array of nine 30-bit words.
+ * Result uses 18 words of 30 bits each.
+ */
+static void
+square9(uint32_t *d, const uint32_t *a)
+{
+	uint64_t t[17];
+	uint64_t cc;
+	int i;
+
+	t[ 0] = MUL31(a[0], a[0]);
+	t[ 1] = ((MUL31(a[0], a[1])) << 1);
+	t[ 2] = MUL31(a[1], a[1])
+		+ ((MUL31(a[0], a[2])) << 1);
+	t[ 3] = ((MUL31(a[0], a[3])
+		+ MUL31(a[1], a[2])) << 1);
+	t[ 4] = MUL31(a[2], a[2])
+		+ ((MUL31(a[0], a[4])
+		+ MUL31(a[1], a[3])) << 1);
+	t[ 5] = ((MUL31(a[0], a[5])
+		+ MUL31(a[1], a[4])
+		+ MUL31(a[2], a[3])) << 1);
+	t[ 6] = MUL31(a[3], a[3])
+		+ ((MUL31(a[0], a[6])
+		+ MUL31(a[1], a[5])
+		+ MUL31(a[2], a[4])) << 1);
+	t[ 7] = ((MUL31(a[0], a[7])
+		+ MUL31(a[1], a[6])
+		+ MUL31(a[2], a[5])
+		+ MUL31(a[3], a[4])) << 1);
+	t[ 8] = MUL31(a[4], a[4])
+		+ ((MUL31(a[0], a[8])
+		+ MUL31(a[1], a[7])
+		+ MUL31(a[2], a[6])
+		+ MUL31(a[3], a[5])) << 1);
+	t[ 9] = ((MUL31(a[1], a[8])
+		+ MUL31(a[2], a[7])
+		+ MUL31(a[3], a[6])
+		+ MUL31(a[4], a[5])) << 1);
+	t[10] = MUL31(a[5], a[5])
+		+ ((MUL31(a[2], a[8])
+		+ MUL31(a[3], a[7])
+		+ MUL31(a[4], a[6])) << 1);
+	t[11] = ((MUL31(a[3], a[8])
+		+ MUL31(a[4], a[7])
+		+ MUL31(a[5], a[6])) << 1);
+	t[12] = MUL31(a[6], a[6])
+		+ ((MUL31(a[4], a[8])
+		+ MUL31(a[5], a[7])) << 1);
+	t[13] = ((MUL31(a[5], a[8])
+		+ MUL31(a[6], a[7])) << 1);
+	t[14] = MUL31(a[7], a[7])
+		+ ((MUL31(a[6], a[8])) << 1);
+	t[15] = ((MUL31(a[7], a[8])) << 1);
+	t[16] = MUL31(a[8], a[8]);
+
+	/*
+	 * Propagate carries.
+	 */
+	cc = 0;
+	for (i = 0; i < 17; i ++) {
+		uint64_t w;
+
+		w = t[i] + cc;
+		d[i] = (uint32_t)w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	d[17] = (uint32_t)cc;
+}
+
+/*
+ * Base field modulus for P-256.
+ */
+static const uint32_t F256[] = {
+
+	0x3FFFFFFF, 0x3FFFFFFF, 0x3FFFFFFF, 0x0000003F, 0x00000000,
+	0x00000000, 0x00001000, 0x3FFFC000, 0x0000FFFF
+};
+
+/*
+ * The 'b' curve equation coefficient for P-256.
+ */
+static const uint32_t P256_B[] = {
+
+	0x27D2604B, 0x2F38F0F8, 0x053B0F63, 0x0741AC33, 0x1886BC65,
+	0x2EF555DA, 0x293E7B3E, 0x0D762A8E, 0x00005AC6
+};
+
+/*
+ * Addition in the field. Source operands shall fit on 257 bits; output
+ * will be lower than twice the modulus.
+ */
+static void
+add_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t w, cc;
+	int i;
+
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		w = a[i] + b[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = w >> 30;
+	}
+	w >>= 16;
+	d[8] &= 0xFFFF;
+	d[3] -= w << 6;
+	d[6] -= w << 12;
+	d[7] += w << 14;
+	cc = w;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = ARSH(w, 30);
+	}
+}
+
+/*
+ * Subtraction in the field. Source operands shall be smaller than twice
+ * the modulus; the result will fulfil the same property.
+ */
+static void
+sub_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t w, cc;
+	int i;
+
+	/*
+	 * We really compute a - b + 2*p to make sure that the result is
+	 * positive.
+	 */
+	w = a[0] - b[0] - 0x00002;
+	d[0] = w & 0x3FFFFFFF;
+	w = a[1] - b[1] + ARSH(w, 30);
+	d[1] = w & 0x3FFFFFFF;
+	w = a[2] - b[2] + ARSH(w, 30);
+	d[2] = w & 0x3FFFFFFF;
+	w = a[3] - b[3] + ARSH(w, 30) + 0x00080;
+	d[3] = w & 0x3FFFFFFF;
+	w = a[4] - b[4] + ARSH(w, 30);
+	d[4] = w & 0x3FFFFFFF;
+	w = a[5] - b[5] + ARSH(w, 30);
+	d[5] = w & 0x3FFFFFFF;
+	w = a[6] - b[6] + ARSH(w, 30) + 0x02000;
+	d[6] = w & 0x3FFFFFFF;
+	w = a[7] - b[7] + ARSH(w, 30) - 0x08000;
+	d[7] = w & 0x3FFFFFFF;
+	w = a[8] - b[8] + ARSH(w, 30) + 0x20000;
+	d[8] = w & 0xFFFF;
+	w >>= 16;
+	d[8] &= 0xFFFF;
+	d[3] -= w << 6;
+	d[6] -= w << 12;
+	d[7] += w << 14;
+	cc = w;
+	for (i = 0; i < 9; i ++) {
+		w = d[i] + cc;
+		d[i] = w & 0x3FFFFFFF;
+		cc = ARSH(w, 30);
+	}
+}
+
+/*
+ * Compute a multiplication in F256. Source operands shall be less than
+ * twice the modulus.
+ */
+static void
+mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	uint32_t t[18];
+	uint64_t s[18];
+	uint64_t cc, x;
+	uint32_t z, c;
+	int i;
+
+	mul9(t, a, b);
+
+	/*
+	 * Modular reduction: each high word in added/subtracted where
+	 * necessary.
+	 *
+	 * The modulus is:
+	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1
+	 * Therefore:
+	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * For a word x at bit offset n (n >= 256), we have:
+	 *    x*2^n = x*2^(n-32) - x*2^(n-64)
+	 *            - x*2^(n - 160) + x*2^(n-256) mod p
+	 *
+	 * Thus, we can nullify the high word if we reinject it at some
+	 * proper emplacements.
+	 *
+	 * We use 64-bit intermediate words to allow for carries to
+	 * accumulate easily, before performing the final propagation.
+	 */
+	for (i = 0; i < 18; i ++) {
+		s[i] = t[i];
+	}
+
+	for (i = 17; i >= 9; i --) {
+		uint64_t y;
+
+		y = s[i];
+		s[i - 1] += ARSHW(y, 2);
+		s[i - 2] += (y << 28) & 0x3FFFFFFF;
+		s[i - 2] -= ARSHW(y, 4);
+		s[i - 3] -= (y << 26) & 0x3FFFFFFF;
+		s[i - 5] -= ARSHW(y, 10);
+		s[i - 6] -= (y << 20) & 0x3FFFFFFF;
+		s[i - 8] += ARSHW(y, 16);
+		s[i - 9] += (y << 14) & 0x3FFFFFFF;
+	}
+
+	/*
+	 * Carry propagation must be signed. Moreover, we may have overdone
+	 * it a bit, and obtain a negative result.
+	 *
+	 * The loop above ran 9 times; each time, each word was augmented
+	 * by at most one extra word (in absolute value). Thus, the top
+	 * word must in fine fit in 39 bits, so the carry below will fit
+	 * on 9 bits.
+	 */
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		x = s[i] + cc;
+		d[i] = (uint32_t)x & 0x3FFFFFFF;
+		cc = ARSHW(x, 30);
+	}
+
+	/*
+	 * All nine words fit on 30 bits, but there may be an extra
+	 * carry for a few bits (at most 9), and that carry may be
+	 * negative. Moreover, we want the result to fit on 257 bits.
+	 * The two lines below ensure that the word in d[] has length
+	 * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The
+	 * significant length of cc is less than 24 bits, so we will be
+	 * able to switch to 32-bit operations.
+	 */
+	cc = ARSHW(x, 16);
+	d[8] &= 0xFFFF;
+
+	/*
+	 * One extra round of reduction, for cc*2^256, which means
+	 * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative)
+	 * value. If cc is negative, then it may happen (rarely, but
+	 * not neglectibly so) that the result would be negative. In
+	 * order to avoid that, if cc is negative, then we add the
+	 * modulus once. Note that if cc is negative, then propagating
+	 * that carry must yield a value lower than the modulus, so
+	 * adding the modulus once will keep the final result under
+	 * twice the modulus.
+	 */
+	z = (uint32_t)cc;
+	d[3] -= z << 6;
+	d[6] -= (z << 12) & 0x3FFFFFFF;
+	d[7] -= ARSH(z, 18);
+	d[7] += (z << 14) & 0x3FFFFFFF;
+	d[8] += ARSH(z, 16);
+	c = z >> 31;
+	d[0] -= c;
+	d[3] += c << 6;
+	d[6] += c << 12;
+	d[7] -= c << 14;
+	d[8] += c << 16;
+	for (i = 0; i < 9; i ++) {
+		uint32_t w;
+
+		w = d[i] + z;
+		d[i] = w & 0x3FFFFFFF;
+		z = ARSH(w, 30);
+	}
+}
+
+/*
+ * Compute a square in F256. Source operand shall be less than
+ * twice the modulus.
+ */
+static void
+square_f256(uint32_t *d, const uint32_t *a)
+{
+	uint32_t t[18];
+	uint64_t s[18];
+	uint64_t cc, x;
+	uint32_t z, c;
+	int i;
+
+	square9(t, a);
+
+	/*
+	 * Modular reduction: each high word in added/subtracted where
+	 * necessary.
+	 *
+	 * The modulus is:
+	 *    p = 2^256 - 2^224 + 2^192 + 2^96 - 1
+	 * Therefore:
+	 *    2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * For a word x at bit offset n (n >= 256), we have:
+	 *    x*2^n = x*2^(n-32) - x*2^(n-64)
+	 *            - x*2^(n - 160) + x*2^(n-256) mod p
+	 *
+	 * Thus, we can nullify the high word if we reinject it at some
+	 * proper emplacements.
+	 *
+	 * We use 64-bit intermediate words to allow for carries to
+	 * accumulate easily, before performing the final propagation.
+	 */
+	for (i = 0; i < 18; i ++) {
+		s[i] = t[i];
+	}
+
+	for (i = 17; i >= 9; i --) {
+		uint64_t y;
+
+		y = s[i];
+		s[i - 1] += ARSHW(y, 2);
+		s[i - 2] += (y << 28) & 0x3FFFFFFF;
+		s[i - 2] -= ARSHW(y, 4);
+		s[i - 3] -= (y << 26) & 0x3FFFFFFF;
+		s[i - 5] -= ARSHW(y, 10);
+		s[i - 6] -= (y << 20) & 0x3FFFFFFF;
+		s[i - 8] += ARSHW(y, 16);
+		s[i - 9] += (y << 14) & 0x3FFFFFFF;
+	}
+
+	/*
+	 * Carry propagation must be signed. Moreover, we may have overdone
+	 * it a bit, and obtain a negative result.
+	 *
+	 * The loop above ran 9 times; each time, each word was augmented
+	 * by at most one extra word (in absolute value). Thus, the top
+	 * word must in fine fit in 39 bits, so the carry below will fit
+	 * on 9 bits.
+	 */
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		x = s[i] + cc;
+		d[i] = (uint32_t)x & 0x3FFFFFFF;
+		cc = ARSHW(x, 30);
+	}
+
+	/*
+	 * All nine words fit on 30 bits, but there may be an extra
+	 * carry for a few bits (at most 9), and that carry may be
+	 * negative. Moreover, we want the result to fit on 257 bits.
+	 * The two lines below ensure that the word in d[] has length
+	 * 256 bits, and the (signed) carry (beyond 2^256) is in cc. The
+	 * significant length of cc is less than 24 bits, so we will be
+	 * able to switch to 32-bit operations.
+	 */
+	cc = ARSHW(x, 16);
+	d[8] &= 0xFFFF;
+
+	/*
+	 * One extra round of reduction, for cc*2^256, which means
+	 * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative)
+	 * value. If cc is negative, then it may happen (rarely, but
+	 * not neglectibly so) that the result would be negative. In
+	 * order to avoid that, if cc is negative, then we add the
+	 * modulus once. Note that if cc is negative, then propagating
+	 * that carry must yield a value lower than the modulus, so
+	 * adding the modulus once will keep the final result under
+	 * twice the modulus.
+	 */
+	z = (uint32_t)cc;
+	d[3] -= z << 6;
+	d[6] -= (z << 12) & 0x3FFFFFFF;
+	d[7] -= ARSH(z, 18);
+	d[7] += (z << 14) & 0x3FFFFFFF;
+	d[8] += ARSH(z, 16);
+	c = z >> 31;
+	d[0] -= c;
+	d[3] += c << 6;
+	d[6] += c << 12;
+	d[7] -= c << 14;
+	d[8] += c << 16;
+	for (i = 0; i < 9; i ++) {
+		uint32_t w;
+
+		w = d[i] + z;
+		d[i] = w & 0x3FFFFFFF;
+		z = ARSH(w, 30);
+	}
+}
+
+/*
+ * Perform a "final reduction" in field F256 (field for curve P-256).
+ * The source value must be less than twice the modulus. If the value
+ * is not lower than the modulus, then the modulus is subtracted and
+ * this function returns 1; otherwise, it leaves it untouched and it
+ * returns 0.
+ */
+static uint32_t
+reduce_final_f256(uint32_t *d)
+{
+	uint32_t t[9];
+	uint32_t cc;
+	int i;
+
+	cc = 0;
+	for (i = 0; i < 9; i ++) {
+		uint32_t w;
+
+		w = d[i] - F256[i] - cc;
+		cc = w >> 31;
+		t[i] = w & 0x3FFFFFFF;
+	}
+	cc ^= 1;
+	CCOPY(cc, d, t, sizeof t);
+	return cc;
+}
+
+/*
+ * Jacobian coordinates for a point in P-256: affine coordinates (X,Y)
+ * are such that:
+ *   X = x / z^2
+ *   Y = y / z^3
+ * For the point at infinity, z = 0.
+ * Each point thus admits many possible representations.
+ *
+ * Coordinates are represented in arrays of 32-bit integers, each holding
+ * 30 bits of data. Values may also be slightly greater than the modulus,
+ * but they will always be lower than twice the modulus.
+ */
+typedef struct {
+	uint32_t x[9];
+	uint32_t y[9];
+	uint32_t z[9];
+} p256_jacobian;
+
+/*
+ * Convert a point to affine coordinates:
+ *  - If the point is the point at infinity, then all three coordinates
+ *    are set to 0.
+ *  - Otherwise, the 'z' coordinate is set to 1, and the 'x' and 'y'
+ *    coordinates are the 'X' and 'Y' affine coordinates.
+ * The coordinates are guaranteed to be lower than the modulus.
+ */
+static void
+p256_to_affine(p256_jacobian *P)
+{
+	uint32_t t1[9], t2[9];
+	int i;
+
+	/*
+	 * Invert z with a modular exponentiation: the modulus is
+	 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1, and the exponent is
+	 * p-2. Exponent bit pattern (from high to low) is:
+	 *  - 32 bits of value 1
+	 *  - 31 bits of value 0
+	 *  - 1 bit of value 1
+	 *  - 96 bits of value 0
+	 *  - 94 bits of value 1
+	 *  - 1 bit of value 0
+	 *  - 1 bit of value 1
+	 * Thus, we precompute z^(2^31-1) to speed things up.
+	 *
+	 * If z = 0 (point at infinity) then the modular exponentiation
+	 * will yield 0, which leads to the expected result (all three
+	 * coordinates set to 0).
+	 */
+
+	/*
+	 * A simple square-and-multiply for z^(2^31-1). We could save about
+	 * two dozen multiplications here with an addition chain, but
+	 * this would require a bit more code, and extra stack buffers.
+	 */
+	memcpy(t1, P->z, sizeof P->z);
+	for (i = 0; i < 30; i ++) {
+		square_f256(t1, t1);
+		mul_f256(t1, t1, P->z);
+	}
+
+	/*
+	 * Square-and-multiply. Apart from the squarings, we have a few
+	 * multiplications to set bits to 1; we multiply by the original z
+	 * for setting 1 bit, and by t1 for setting 31 bits.
+	 */
+	memcpy(t2, P->z, sizeof P->z);
+	for (i = 1; i < 256; i ++) {
+		square_f256(t2, t2);
+		switch (i) {
+		case 31:
+		case 190:
+		case 221:
+		case 252:
+			mul_f256(t2, t2, t1);
+			break;
+		case 63:
+		case 253:
+		case 255:
+			mul_f256(t2, t2, P->z);
+			break;
+		}
+	}
+
+	/*
+	 * Now that we have 1/z, multiply x by 1/z^2 and y by 1/z^3.
+	 */
+	mul_f256(t1, t2, t2);
+	mul_f256(P->x, t1, P->x);
+	mul_f256(t1, t1, t2);
+	mul_f256(P->y, t1, P->y);
+	reduce_final_f256(P->x);
+	reduce_final_f256(P->y);
+
+	/*
+	 * Multiply z by 1/z. If z = 0, then this will yield 0, otherwise
+	 * this will set z to 1.
+	 */
+	mul_f256(P->z, P->z, t2);
+	reduce_final_f256(P->z);
+}
+
+/*
+ * Double a point in P-256. This function works for all valid points,
+ * including the point at infinity.
+ */
+static void
+p256_double(p256_jacobian *Q)
+{
+	/*
+	 * Doubling formulas are:
+	 *
+	 *   s = 4*x*y^2
+	 *   m = 3*(x + z^2)*(x - z^2)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y^4
+	 *   z' = 2*y*z
+	 *
+	 * These formulas work for all points, including points of order 2
+	 * and points at infinity:
+	 *   - If y = 0 then z' = 0. But there is no such point in P-256
+	 *     anyway.
+	 *   - If z = 0 then z' = 0.
+	 */
+	uint32_t t1[9], t2[9], t3[9], t4[9];
+
+	/*
+	 * Compute z^2 in t1.
+	 */
+	square_f256(t1, Q->z);
+
+	/*
+	 * Compute x-z^2 in t2 and x+z^2 in t1.
+	 */
+	add_f256(t2, Q->x, t1);
+	sub_f256(t1, Q->x, t1);
+
+	/*
+	 * Compute 3*(x+z^2)*(x-z^2) in t1.
+	 */
+	mul_f256(t3, t1, t2);
+	add_f256(t1, t3, t3);
+	add_f256(t1, t3, t1);
+
+	/*
+	 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	square_f256(t3, Q->y);
+	add_f256(t3, t3, t3);
+	mul_f256(t2, Q->x, t3);
+	add_f256(t2, t2, t2);
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	square_f256(Q->x, t1);
+	sub_f256(Q->x, Q->x, t2);
+	sub_f256(Q->x, Q->x, t2);
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	mul_f256(t4, Q->y, Q->z);
+	add_f256(Q->z, t4, t4);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	sub_f256(t2, t2, Q->x);
+	mul_f256(Q->y, t1, t2);
+	square_f256(t4, t3);
+	add_f256(t4, t4, t4);
+	sub_f256(Q->y, Q->y, t4);
+}
+
+/*
+ * Add point P2 to point P1.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0 but P2 != 0
+ *   - If P1 != 0 but P2 == 0
+ *   - If P1 == P2
+ *
+ * In all three cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate
+ *   - P1 == 0 and P2 == 0
+ *   - The Y coordinate of one of the points is 0 and the other point is
+ *     the point at infinity.
+ *
+ * The third case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ */
+static uint32_t
+p256_add(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1 * z2^2
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1 * z2^3
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1 * z2
+	 */
+	uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9];
+	uint32_t ret;
+	int i;
+
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	square_f256(t3, P2->z);
+	mul_f256(t1, P1->x, t3);
+	mul_f256(t4, P2->z, t3);
+	mul_f256(t3, P1->y, t4);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	square_f256(t4, P1->z);
+	mul_f256(t2, P2->x, t4);
+	mul_f256(t5, P1->z, t4);
+	mul_f256(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	sub_f256(t2, t2, t1);
+	sub_f256(t4, t4, t3);
+	reduce_final_f256(t4);
+	ret = 0;
+	for (i = 0; i < 9; i ++) {
+		ret |= t4[i];
+	}
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	square_f256(t7, t2);
+	mul_f256(t6, t1, t7);
+	mul_f256(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	square_f256(P1->x, t4);
+	sub_f256(P1->x, P1->x, t5);
+	sub_f256(P1->x, P1->x, t6);
+	sub_f256(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	sub_f256(t6, t6, P1->x);
+	mul_f256(P1->y, t4, t6);
+	mul_f256(t1, t5, t3);
+	sub_f256(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	mul_f256(t1, P1->z, P2->z);
+	mul_f256(P1->z, t1, t2);
+
+	return ret;
+}
+
+/*
+ * Add point P2 to point P1. This is a specialised function for the
+ * case when P2 is a non-zero point in affine coordinate.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0
+ *   - If P1 == P2
+ *
+ * In both cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate
+ *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
+ *
+ * The second case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ */
+static uint32_t
+p256_add_mixed(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 */
+	uint32_t t1[9], t2[9], t3[9], t4[9], t5[9], t6[9], t7[9];
+	uint32_t ret;
+	int i;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	square_f256(t4, P1->z);
+	mul_f256(t2, P2->x, t4);
+	mul_f256(t5, P1->z, t4);
+	mul_f256(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	sub_f256(t2, t2, t1);
+	sub_f256(t4, t4, t3);
+	reduce_final_f256(t4);
+	ret = 0;
+	for (i = 0; i < 9; i ++) {
+		ret |= t4[i];
+	}
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	square_f256(t7, t2);
+	mul_f256(t6, t1, t7);
+	mul_f256(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	square_f256(P1->x, t4);
+	sub_f256(P1->x, P1->x, t5);
+	sub_f256(P1->x, P1->x, t6);
+	sub_f256(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	sub_f256(t6, t6, P1->x);
+	mul_f256(P1->y, t4, t6);
+	mul_f256(t1, t5, t3);
+	sub_f256(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	mul_f256(P1->z, P1->z, t2);
+
+	return ret;
+}
+
+/*
+ * Decode a P-256 point. This function does not support the point at
+ * infinity. Returned value is 0 if the point is invalid, 1 otherwise.
+ */
+static uint32_t
+p256_decode(p256_jacobian *P, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	uint32_t tx[9], ty[9], t1[9], t2[9];
+	uint32_t bad;
+	int i;
+
+	if (len != 65) {
+		return 0;
+	}
+	buf = src;
+
+	/*
+	 * First byte must be 0x04 (uncompressed format). We could support
+	 * "hybrid format" (first byte is 0x06 or 0x07, and encodes the
+	 * least significant bit of the Y coordinate), but it is explicitly
+	 * forbidden by RFC 5480 (section 2.2).
+	 */
+	bad = NEQ(buf[0], 0x04);
+
+	/*
+	 * Decode the coordinates, and check that they are both lower
+	 * than the modulus.
+	 */
+	tx[8] = be8_to_le30(tx, buf + 1, 32);
+	ty[8] = be8_to_le30(ty, buf + 33, 32);
+	bad |= reduce_final_f256(tx);
+	bad |= reduce_final_f256(ty);
+
+	/*
+	 * Check curve equation.
+	 */
+	square_f256(t1, tx);
+	mul_f256(t1, tx, t1);
+	square_f256(t2, ty);
+	sub_f256(t1, t1, tx);
+	sub_f256(t1, t1, tx);
+	sub_f256(t1, t1, tx);
+	add_f256(t1, t1, P256_B);
+	sub_f256(t1, t1, t2);
+	reduce_final_f256(t1);
+	for (i = 0; i < 9; i ++) {
+		bad |= t1[i];
+	}
+
+	/*
+	 * Copy coordinates to the point structure.
+	 */
+	memcpy(P->x, tx, sizeof tx);
+	memcpy(P->y, ty, sizeof ty);
+	memset(P->z, 0, sizeof P->z);
+	P->z[0] = 1;
+	return EQ(bad, 0);
+}
+
+/*
+ * Encode a point into a buffer. This function assumes that the point is
+ * valid, in affine coordinates, and not the point at infinity.
+ */
+static void
+p256_encode(void *dst, const p256_jacobian *P)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = 0x04;
+	le30_to_be8(buf + 1, 32, P->x);
+	le30_to_be8(buf + 33, 32, P->y);
+}
+
+/*
+ * Multiply a curve point by an integer. The integer is assumed to be
+ * lower than the curve order, and the base point must not be the point
+ * at infinity.
+ */
+static void
+p256_mul(p256_jacobian *P, const unsigned char *x, size_t xlen)
+{
+	/*
+	 * qz is a flag that is initially 1, and remains equal to 1
+	 * as long as the point is the point at infinity.
+	 *
+	 * We use a 2-bit window to handle multiplier bits by pairs.
+	 * The precomputed window really is the points P2 and P3.
+	 */
+	uint32_t qz;
+	p256_jacobian P2, P3, Q, T, U;
+
+	/*
+	 * Compute window values.
+	 */
+	P2 = *P;
+	p256_double(&P2);
+	P3 = *P;
+	p256_add(&P3, &P2);
+
+	/*
+	 * We start with Q = 0. We process multiplier bits 2 by 2.
+	 */
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			T = *P;
+			U = Q;
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			p256_add(&U, &T);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	*P = Q;
+}
+
+/*
+ * Precomputed window: k*G points, where G is the curve generator, and k
+ * is an integer from 1 to 15 (inclusive). The X and Y coordinates of
+ * the point are encoded as 9 words of 30 bits each (little-endian
+ * order).
+ */
+static const uint32_t Gwin[15][18] = {
+
+	{ 0x1898C296, 0x1284E517, 0x1EB33A0F, 0x00DF604B,
+	  0x2440F277, 0x339B958E, 0x04247F8B, 0x347CB84B,
+	  0x00006B17, 0x37BF51F5, 0x2ED901A0, 0x3315ECEC,
+	  0x338CD5DA, 0x0F9E162B, 0x1FAD29F0, 0x27F9B8EE,
+	  0x10B8BF86, 0x00004FE3 },
+
+	{ 0x07669978, 0x182D23F1, 0x3F21B35A, 0x225A789D,
+	  0x351AC3C0, 0x08E00C12, 0x34F7E8A5, 0x1EC62340,
+	  0x00007CF2, 0x227873D1, 0x3812DE74, 0x0E982299,
+	  0x1F6B798F, 0x3430DBBA, 0x366B1A7D, 0x2D040293,
+	  0x154436E3, 0x00000777 },
+
+	{ 0x06E7FD6C, 0x2D05986F, 0x3ADA985F, 0x31ADC87B,
+	  0x0BF165E6, 0x1FBE5475, 0x30A44C8F, 0x3934698C,
+	  0x00005ECB, 0x227D5032, 0x29E6C49E, 0x04FB83D9,
+	  0x0AAC0D8E, 0x24A2ECD8, 0x2C1B3869, 0x0FF7E374,
+	  0x19031266, 0x00008734 },
+
+	{ 0x2B030852, 0x024C0911, 0x05596EF5, 0x07F8B6DE,
+	  0x262BD003, 0x3779967B, 0x08FBBA02, 0x128D4CB4,
+	  0x0000E253, 0x184ED8C6, 0x310B08FC, 0x30EE0055,
+	  0x3F25B0FC, 0x062D764E, 0x3FB97F6A, 0x33CC719D,
+	  0x15D69318, 0x0000E0F1 },
+
+	{ 0x03D033ED, 0x05552837, 0x35BE5242, 0x2320BF47,
+	  0x268FDFEF, 0x13215821, 0x140D2D78, 0x02DE9454,
+	  0x00005159, 0x3DA16DA4, 0x0742ED13, 0x0D80888D,
+	  0x004BC035, 0x0A79260D, 0x06FCDAFE, 0x2727D8AE,
+	  0x1F6A2412, 0x0000E0C1 },
+
+	{ 0x3C2291A9, 0x1AC2ABA4, 0x3B215B4C, 0x131D037A,
+	  0x17DDE302, 0x0C90B2E2, 0x0602C92D, 0x05CA9DA9,
+	  0x0000B01A, 0x0FC77FE2, 0x35F1214E, 0x07E16BDF,
+	  0x003DDC07, 0x2703791C, 0x3038B7EE, 0x3DAD56FE,
+	  0x041D0C8D, 0x0000E85C },
+
+	{ 0x3187B2A3, 0x0018A1C0, 0x00FEF5B3, 0x3E7E2E2A,
+	  0x01FB607E, 0x2CC199F0, 0x37B4625B, 0x0EDBE82F,
+	  0x00008E53, 0x01F400B4, 0x15786A1B, 0x3041B21C,
+	  0x31CD8CF2, 0x35900053, 0x1A7E0E9B, 0x318366D0,
+	  0x076F780C, 0x000073EB },
+
+	{ 0x1B6FB393, 0x13767707, 0x3CE97DBB, 0x348E2603,
+	  0x354CADC1, 0x09D0B4EA, 0x1B053404, 0x1DE76FBA,
+	  0x000062D9, 0x0F09957E, 0x295029A8, 0x3E76A78D,
+	  0x3B547DAE, 0x27CEE0A2, 0x0575DC45, 0x1D8244FF,
+	  0x332F647A, 0x0000AD5A },
+
+	{ 0x10949EE0, 0x1E7A292E, 0x06DF8B3D, 0x02B2E30B,
+	  0x31F8729E, 0x24E35475, 0x30B71878, 0x35EDBFB7,
+	  0x0000EA68, 0x0DD048FA, 0x21688929, 0x0DE823FE,
+	  0x1C53FAA9, 0x0EA0C84D, 0x052A592A, 0x1FCE7870,
+	  0x11325CB2, 0x00002A27 },
+
+	{ 0x04C5723F, 0x30D81A50, 0x048306E4, 0x329B11C7,
+	  0x223FB545, 0x085347A8, 0x2993E591, 0x1B5ACA8E,
+	  0x0000CEF6, 0x04AF0773, 0x28D2EEA9, 0x2751EEEC,
+	  0x037B4A7F, 0x3B4C1059, 0x08F37674, 0x2AE906E1,
+	  0x18A88A6A, 0x00008786 },
+
+	{ 0x34BC21D1, 0x0CCE474D, 0x15048BF4, 0x1D0BB409,
+	  0x021CDA16, 0x20DE76C3, 0x34C59063, 0x04EDE20E,
+	  0x00003ED1, 0x282A3740, 0x0BE3BBF3, 0x29889DAE,
+	  0x03413697, 0x34C68A09, 0x210EBE93, 0x0C8A224C,
+	  0x0826B331, 0x00009099 },
+
+	{ 0x0624E3C4, 0x140317BA, 0x2F82C99D, 0x260C0A2C,
+	  0x25D55179, 0x194DCC83, 0x3D95E462, 0x356F6A05,
+	  0x0000741D, 0x0D4481D3, 0x2657FC8B, 0x1BA5CA71,
+	  0x3AE44B0D, 0x07B1548E, 0x0E0D5522, 0x05FDC567,
+	  0x2D1AA70E, 0x00000770 },
+
+	{ 0x06072C01, 0x23857675, 0x1EAD58A9, 0x0B8A12D9,
+	  0x1EE2FC79, 0x0177CB61, 0x0495A618, 0x20DEB82B,
+	  0x0000177C, 0x2FC7BFD8, 0x310EEF8B, 0x1FB4DF39,
+	  0x3B8530E8, 0x0F4E7226, 0x0246B6D0, 0x2A558A24,
+	  0x163353AF, 0x000063BB },
+
+	{ 0x24D2920B, 0x1C249DCC, 0x2069C5E5, 0x09AB2F9E,
+	  0x36DF3CF1, 0x1991FD0C, 0x062B97A7, 0x1E80070E,
+	  0x000054E7, 0x20D0B375, 0x2E9F20BD, 0x35090081,
+	  0x1C7A9DDC, 0x22E7C371, 0x087E3016, 0x03175421,
+	  0x3C6ECA7D, 0x0000F599 },
+
+	{ 0x259B9D5F, 0x0D9A318F, 0x23A0EF16, 0x00EBE4B7,
+	  0x088265AE, 0x2CDE2666, 0x2BAE7ADF, 0x1371A5C6,
+	  0x0000F045, 0x0D034F36, 0x1F967378, 0x1B5FA3F4,
+	  0x0EC8739D, 0x1643E62A, 0x1653947E, 0x22D1F4E6,
+	  0x0FB8D64B, 0x0000B5B9 }
+};
+
+/*
+ * Lookup one of the Gwin[] values, by index. This is constant-time.
+ */
+static void
+lookup_Gwin(p256_jacobian *T, uint32_t idx)
+{
+	uint32_t xy[18];
+	uint32_t k;
+	size_t u;
+
+	memset(xy, 0, sizeof xy);
+	for (k = 0; k < 15; k ++) {
+		uint32_t m;
+
+		m = -EQ(idx, k + 1);
+		for (u = 0; u < 18; u ++) {
+			xy[u] |= m & Gwin[k][u];
+		}
+	}
+	memcpy(T->x, &xy[0], sizeof T->x);
+	memcpy(T->y, &xy[9], sizeof T->y);
+	memset(T->z, 0, sizeof T->z);
+	T->z[0] = 1;
+}
+
+/*
+ * Multiply the generator by an integer. The integer is assumed non-zero
+ * and lower than the curve order.
+ */
+static void
+p256_mulgen(p256_jacobian *P, const unsigned char *x, size_t xlen)
+{
+	/*
+	 * qz is a flag that is initially 1, and remains equal to 1
+	 * as long as the point is the point at infinity.
+	 *
+	 * We use a 4-bit window to handle multiplier bits by groups
+	 * of 4. The precomputed window is constant static data, with
+	 * points in affine coordinates; we use a constant-time lookup.
+	 */
+	p256_jacobian Q;
+	uint32_t qz;
+
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+		unsigned bx;
+
+		bx = *x ++;
+		for (k = 0; k < 2; k ++) {
+			uint32_t bits;
+			uint32_t bnz;
+			p256_jacobian T, U;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			bits = (bx >> 4) & 0x0F;
+			bnz = NEQ(bits, 0);
+			lookup_Gwin(&T, bits);
+			U = Q;
+			p256_add_mixed(&U, &T);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+			bx <<= 4;
+		}
+	}
+	*P = Q;
+}
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8,
+	0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D,
+	0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8,
+	0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B,
+	0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40,
+	0x68, 0x37, 0xBF, 0x51, 0xF5
+};
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD,
+	0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63,
+	0x25, 0x51
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_G;
+	return P256_G;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_N;
+	return P256_N;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	p256_jacobian P;
+
+	(void)curve;
+	if (Glen != 65) {
+		return 0;
+	}
+	r = p256_decode(&P, G, Glen);
+	p256_mul(&P, x, xlen);
+	p256_to_affine(&P);
+	p256_encode(G, &P);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	p256_jacobian P;
+
+	(void)curve;
+	p256_mulgen(&P, x, xlen);
+	p256_to_affine(&P);
+	p256_encode(R, &P);
+	return 65;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	p256_jacobian P, Q;
+	uint32_t r, t, z;
+	int i;
+
+	(void)curve;
+	if (len != 65) {
+		return 0;
+	}
+	r = p256_decode(&P, A, len);
+	p256_mul(&P, x, xlen);
+	if (B == NULL) {
+		p256_mulgen(&Q, y, ylen);
+	} else {
+		r &= p256_decode(&Q, B, len);
+		p256_mul(&Q, y, ylen);
+	}
+
+	/*
+	 * The final addition may fail in case both points are equal.
+	 */
+	t = p256_add(&P, &Q);
+	reduce_final_f256(P.z);
+	z = 0;
+	for (i = 0; i < 9; i ++) {
+		z |= P.z[i];
+	}
+	z = EQ(z, 0);
+	p256_double(&Q);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	p256_to_affine(&P);
+	p256_encode(A, &P);
+	r &= ~(z & t);
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_p256_m31 = {
+	(uint32_t)0x00800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_p256_m62.c b/third_party/bearssl/src/ec_p256_m62.c
new file mode 100644
index 0000000..a431790
--- /dev/null
+++ b/third_party/bearssl/src/ec_p256_m62.c
@@ -0,0 +1,1765 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8,
+	0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D,
+	0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8,
+	0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B,
+	0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40,
+	0x68, 0x37, 0xBF, 0x51, 0xF5
+};
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD,
+	0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63,
+	0x25, 0x51
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_G;
+	return P256_G;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_N;
+	return P256_N;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 1;
+}
+
+/*
+ * A field element is encoded as five 64-bit integers, in basis 2^52.
+ * Limbs may occasionally exceed 2^52.
+ *
+ * A _partially reduced_ value is such that the following hold:
+ *   - top limb is less than 2^48 + 2^30
+ *   - the other limbs fit on 53 bits each
+ * In particular, such a value is less than twice the modulus p.
+ */
+
+#define BIT(n)   ((uint64_t)1 << (n))
+#define MASK48   (BIT(48) - BIT(0))
+#define MASK52   (BIT(52) - BIT(0))
+
+/* R = 2^260 mod p */
+static const uint64_t F256_R[] = {
+	0x0000000000010, 0xF000000000000, 0xFFFFFFFFFFFFF,
+	0xFFEFFFFFFFFFF, 0x00000000FFFFF
+};
+
+/* Curve equation is y^2 = x^3 - 3*x + B. This constant is B*R mod p
+   (Montgomery representation of B). */
+static const uint64_t P256_B_MONTY[] = {
+	0xDF6229C4BDDFD, 0xCA8843090D89C, 0x212ED6ACF005C,
+	0x83415A220ABF7, 0x0C30061DD4874
+};
+
+/*
+ * Addition in the field. Carry propagation is not performed.
+ * On input, limbs may be up to 63 bits each; on output, they will
+ * be up to one bit more than on input.
+ */
+static inline void
+f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	d[0] = a[0] + b[0];
+	d[1] = a[1] + b[1];
+	d[2] = a[2] + b[2];
+	d[3] = a[3] + b[3];
+	d[4] = a[4] + b[4];
+}
+
+/*
+ * Partially reduce the provided value.
+ * Input: limbs can go up to 61 bits each.
+ * Output: partially reduced.
+ */
+static inline void
+f256_partial_reduce(uint64_t *a)
+{
+	uint64_t w, cc, s;
+
+	/*
+	 * Propagate carries.
+	 */
+	w = a[0];
+	a[0] = w & MASK52;
+	cc = w >> 52;
+	w = a[1] + cc;
+	a[1] = w & MASK52;
+	cc = w >> 52;
+	w = a[2] + cc;
+	a[2] = w & MASK52;
+	cc = w >> 52;
+	w = a[3] + cc;
+	a[3] = w & MASK52;
+	cc = w >> 52;
+	a[4] += cc;
+
+	s = a[4] >> 48;             /* s < 2^14 */
+	a[0] += s;                  /* a[0] < 2^52 + 2^14 */
+	w = a[1] - (s << 44);
+	a[1] = w & MASK52;          /* a[1] < 2^52 */
+	cc = -(w >> 52) & 0xFFF;    /* cc < 16 */
+	w = a[2] - cc;
+	a[2] = w & MASK52;          /* a[2] < 2^52 */
+	cc = w >> 63;               /* cc = 0 or 1 */
+	w = a[3] - cc - (s << 36);
+	a[3] = w & MASK52;          /* a[3] < 2^52 */
+	cc = w >> 63;               /* cc = 0 or 1 */
+	w = a[4] & MASK48;
+	a[4] = w + (s << 16) - cc;  /* a[4] < 2^48 + 2^30 */
+}
+
+/*
+ * Subtraction in the field.
+ * Input: limbs must fit on 60 bits each; in particular, the complete
+ * integer will be less than 2^268 + 2^217.
+ * Output: partially reduced.
+ */
+static inline void
+f256_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+	uint64_t t[5], w, s, cc;
+
+	/*
+	 * We compute d = 2^13*p + a - b; this ensures a positive
+	 * intermediate value.
+	 *
+	 * Each individual addition/subtraction may yield a positive or
+	 * negative result; thus, we need to handle a signed carry, thus
+	 * with sign extension. We prefer not to use signed types (int64_t)
+	 * because conversion from unsigned to signed is cumbersome (a
+	 * direct cast with the top bit set is undefined behavior; instead,
+	 * we have to use pointer aliasing, using the guaranteed properties
+	 * of exact-width types, but this requires the compiler to optimize
+	 * away the writes and reads from RAM), and right-shifting a
+	 * signed negative value is implementation-defined. Therefore,
+	 * we use a custom sign extension.
+	 */
+
+	w = a[0] - b[0] - BIT(13);
+	t[0] = w & MASK52;
+	cc = w >> 52;
+	cc |= -(cc & BIT(11));
+	w = a[1] - b[1] + cc;
+	t[1] = w & MASK52;
+	cc = w >> 52;
+	cc |= -(cc & BIT(11));
+	w = a[2] - b[2] + cc;
+	t[2] = (w & MASK52) + BIT(5);
+	cc = w >> 52;
+	cc |= -(cc & BIT(11));
+	w = a[3] - b[3] + cc;
+	t[3] = (w & MASK52) + BIT(49);
+	cc = w >> 52;
+	cc |= -(cc & BIT(11));
+	t[4] = (BIT(61) - BIT(29)) + a[4] - b[4] + cc;
+
+	/*
+	 * Perform partial reduction. Rule is:
+	 *  2^256 = 2^224 - 2^192 - 2^96 + 1 mod p
+	 *
+	 * At that point:
+	 *    0 <= t[0] <= 2^52 - 1
+	 *    0 <= t[1] <= 2^52 - 1
+	 *    2^5 <= t[2] <= 2^52 + 2^5 - 1
+	 *    2^49 <= t[3] <= 2^52 + 2^49 - 1
+	 *    2^59 < t[4] <= 2^61 + 2^60 - 2^29
+	 *
+	 * Thus, the value 's' (t[4] / 2^48) will be necessarily
+	 * greater than 2048, and less than 12288.
+	 */
+	s = t[4] >> 48;
+
+	d[0] = t[0] + s;             /* d[0] <= 2^52 + 12287 */
+	w = t[1] - (s << 44);
+	d[1] = w & MASK52;           /* d[1] <= 2^52 - 1 */
+	cc = -(w >> 52) & 0xFFF;     /* cc <= 48 */
+	w = t[2] - cc;
+	cc = w >> 63;                /* cc = 0 or 1 */
+	d[2] = w + (cc << 52);       /* d[2] <= 2^52 + 31 */
+	w = t[3] - cc - (s << 36);
+	cc = w >> 63;                /* cc = 0 or 1 */
+	d[3] = w + (cc << 52);       /* t[3] <= 2^52 + 2^49 - 1 */
+	d[4] = (t[4] & MASK48) + (s << 16) - cc;  /* d[4] < 2^48 + 2^30 */
+
+	/*
+	 * If s = 0, then none of the limbs is modified, and there cannot
+	 * be an overflow; if s != 0, then (s << 16) > cc, and there is
+	 * no overflow either.
+	 */
+}
+
+/*
+ * Montgomery multiplication in the field.
+ * Input: limbs must fit on 56 bits each.
+ * Output: partially reduced.
+ */
+static void
+f256_montymul(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	int i;
+	uint64_t t[5];
+
+	t[0] = 0;
+	t[1] = 0;
+	t[2] = 0;
+	t[3] = 0;
+	t[4] = 0;
+	for (i = 0; i < 5; i ++) {
+		uint64_t x, f, cc, w, s;
+		unsigned __int128 z;
+
+		/*
+		 * Since limbs of a[] and b[] fit on 56 bits each,
+		 * each individual product fits on 112 bits. Also,
+		 * the factor f fits on 52 bits, so f<<48 fits on
+		 * 112 bits too. This guarantees that carries (cc)
+		 * will fit on 62 bits, thus no overflow.
+		 *
+		 * The operations below compute:
+		 *   t <- (t + x*b + f*p) / 2^64
+		 */
+		x = a[i];
+		z = (unsigned __int128)b[0] * (unsigned __int128)x
+			+ (unsigned __int128)t[0];
+		f = (uint64_t)z & MASK52;
+		cc = (uint64_t)(z >> 52);
+		z = (unsigned __int128)b[1] * (unsigned __int128)x
+			+ (unsigned __int128)t[1] + cc
+			+ ((unsigned __int128)f << 44);
+		t[0] = (uint64_t)z & MASK52;
+		cc = (uint64_t)(z >> 52);
+		z = (unsigned __int128)b[2] * (unsigned __int128)x
+			+ (unsigned __int128)t[2] + cc;
+		t[1] = (uint64_t)z & MASK52;
+		cc = (uint64_t)(z >> 52);
+		z = (unsigned __int128)b[3] * (unsigned __int128)x
+			+ (unsigned __int128)t[3] + cc
+			+ ((unsigned __int128)f << 36);
+		t[2] = (uint64_t)z & MASK52;
+		cc = (uint64_t)(z >> 52);
+		z = (unsigned __int128)b[4] * (unsigned __int128)x
+			+ (unsigned __int128)t[4] + cc
+			+ ((unsigned __int128)f << 48)
+			- ((unsigned __int128)f << 16);
+		t[3] = (uint64_t)z & MASK52;
+		t[4] = (uint64_t)(z >> 52);
+
+		/*
+		 * t[4] may be up to 62 bits here; we need to do a
+		 * partial reduction. Note that limbs t[0] to t[3]
+		 * fit on 52 bits each.
+		 */
+		s = t[4] >> 48;             /* s < 2^14 */
+		t[0] += s;                  /* t[0] < 2^52 + 2^14 */
+		w = t[1] - (s << 44);
+		t[1] = w & MASK52;          /* t[1] < 2^52 */
+		cc = -(w >> 52) & 0xFFF;    /* cc < 16 */
+		w = t[2] - cc;
+		t[2] = w & MASK52;          /* t[2] < 2^52 */
+		cc = w >> 63;               /* cc = 0 or 1 */
+		w = t[3] - cc - (s << 36);
+		t[3] = w & MASK52;          /* t[3] < 2^52 */
+		cc = w >> 63;               /* cc = 0 or 1 */
+		w = t[4] & MASK48;
+		t[4] = w + (s << 16) - cc;  /* t[4] < 2^48 + 2^30 */
+
+		/*
+		 * The final t[4] cannot overflow because cc is 0 or 1,
+		 * and cc can be 1 only if s != 0.
+		 */
+	}
+
+	d[0] = t[0];
+	d[1] = t[1];
+	d[2] = t[2];
+	d[3] = t[3];
+	d[4] = t[4];
+
+#elif BR_UMUL128
+
+	int i;
+	uint64_t t[5];
+
+	t[0] = 0;
+	t[1] = 0;
+	t[2] = 0;
+	t[3] = 0;
+	t[4] = 0;
+	for (i = 0; i < 5; i ++) {
+		uint64_t x, f, cc, w, s, zh, zl;
+		unsigned char k;
+
+		/*
+		 * Since limbs of a[] and b[] fit on 56 bits each,
+		 * each individual product fits on 112 bits. Also,
+		 * the factor f fits on 52 bits, so f<<48 fits on
+		 * 112 bits too. This guarantees that carries (cc)
+		 * will fit on 62 bits, thus no overflow.
+		 *
+		 * The operations below compute:
+		 *   t <- (t + x*b + f*p) / 2^64
+		 */
+		x = a[i];
+		zl = _umul128(b[0], x, &zh);
+		k = _addcarry_u64(0, t[0], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		f = zl & MASK52;
+		cc = (zl >> 52) | (zh << 12);
+
+		zl = _umul128(b[1], x, &zh);
+		k = _addcarry_u64(0, t[1], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, cc, zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, f << 44, zl, &zl);
+		(void)_addcarry_u64(k, f >> 20, zh, &zh);
+		t[0] = zl & MASK52;
+		cc = (zl >> 52) | (zh << 12);
+
+		zl = _umul128(b[2], x, &zh);
+		k = _addcarry_u64(0, t[2], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, cc, zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		t[1] = zl & MASK52;
+		cc = (zl >> 52) | (zh << 12);
+
+		zl = _umul128(b[3], x, &zh);
+		k = _addcarry_u64(0, t[3], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, cc, zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, f << 36, zl, &zl);
+		(void)_addcarry_u64(k, f >> 28, zh, &zh);
+		t[2] = zl & MASK52;
+		cc = (zl >> 52) | (zh << 12);
+
+		zl = _umul128(b[4], x, &zh);
+		k = _addcarry_u64(0, t[4], zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, cc, zl, &zl);
+		(void)_addcarry_u64(k, 0, zh, &zh);
+		k = _addcarry_u64(0, f << 48, zl, &zl);
+		(void)_addcarry_u64(k, f >> 16, zh, &zh);
+		k = _subborrow_u64(0, zl, f << 16, &zl);
+		(void)_subborrow_u64(k, zh, f >> 48, &zh);
+		t[3] = zl & MASK52;
+		t[4] = (zl >> 52) | (zh << 12);
+
+		/*
+		 * t[4] may be up to 62 bits here; we need to do a
+		 * partial reduction. Note that limbs t[0] to t[3]
+		 * fit on 52 bits each.
+		 */
+		s = t[4] >> 48;             /* s < 2^14 */
+		t[0] += s;                  /* t[0] < 2^52 + 2^14 */
+		w = t[1] - (s << 44);
+		t[1] = w & MASK52;          /* t[1] < 2^52 */
+		cc = -(w >> 52) & 0xFFF;    /* cc < 16 */
+		w = t[2] - cc;
+		t[2] = w & MASK52;          /* t[2] < 2^52 */
+		cc = w >> 63;               /* cc = 0 or 1 */
+		w = t[3] - cc - (s << 36);
+		t[3] = w & MASK52;          /* t[3] < 2^52 */
+		cc = w >> 63;               /* cc = 0 or 1 */
+		w = t[4] & MASK48;
+		t[4] = w + (s << 16) - cc;  /* t[4] < 2^48 + 2^30 */
+
+		/*
+		 * The final t[4] cannot overflow because cc is 0 or 1,
+		 * and cc can be 1 only if s != 0.
+		 */
+	}
+
+	d[0] = t[0];
+	d[1] = t[1];
+	d[2] = t[2];
+	d[3] = t[3];
+	d[4] = t[4];
+
+#endif
+}
+
+/*
+ * Montgomery squaring in the field; currently a basic wrapper around
+ * multiplication (inline, should be optimized away).
+ * TODO: see if some extra speed can be gained here.
+ */
+static inline void
+f256_montysquare(uint64_t *d, const uint64_t *a)
+{
+	f256_montymul(d, a, a);
+}
+
+/*
+ * Convert to Montgomery representation.
+ */
+static void
+f256_tomonty(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * R2 = 2^520 mod p.
+	 * If R = 2^260 mod p, then R2 = R^2 mod p; and the Montgomery
+	 * multiplication of a by R2 is: a*R2/R = a*R mod p, i.e. the
+	 * conversion to Montgomery representation.
+	 */
+	static const uint64_t R2[] = {
+		0x0000000000300, 0xFFFFFFFF00000, 0xFFFFEFFFFFFFB,
+		0xFDFFFFFFFFFFF, 0x0000004FFFFFF
+	};
+
+	f256_montymul(d, a, R2);
+}
+
+/*
+ * Convert from Montgomery representation.
+ */
+static void
+f256_frommonty(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * Montgomery multiplication by 1 is division by 2^260 modulo p.
+	 */
+	static const uint64_t one[] = { 1, 0, 0, 0, 0 };
+
+	f256_montymul(d, a, one);
+}
+
+/*
+ * Inversion in the field. If the source value is 0 modulo p, then this
+ * returns 0 or p. This function uses Montgomery representation.
+ */
+static void
+f256_invert(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * We compute a^(p-2) mod p. The exponent pattern (from high to
+	 * low) is:
+	 *  - 32 bits of value 1
+	 *  - 31 bits of value 0
+	 *  - 1 bit of value 1
+	 *  - 96 bits of value 0
+	 *  - 94 bits of value 1
+	 *  - 1 bit of value 0
+	 *  - 1 bit of value 1
+	 * To speed up the square-and-multiply algorithm, we precompute
+	 * a^(2^31-1).
+	 */
+
+	uint64_t r[5], t[5];
+	int i;
+
+	memcpy(t, a, sizeof t);
+	for (i = 0; i < 30; i ++) {
+		f256_montysquare(t, t);
+		f256_montymul(t, t, a);
+	}
+
+	memcpy(r, t, sizeof t);
+	for (i = 224; i >= 0; i --) {
+		f256_montysquare(r, r);
+		switch (i) {
+		case 0:
+		case 2:
+		case 192:
+		case 224:
+			f256_montymul(r, r, a);
+			break;
+		case 3:
+		case 34:
+		case 65:
+			f256_montymul(r, r, t);
+			break;
+		}
+	}
+	memcpy(d, r, sizeof r);
+}
+
+/*
+ * Finalize reduction.
+ * Input value should be partially reduced.
+ * On output, limbs a[0] to a[3] fit on 52 bits each, limb a[4] fits
+ * on 48 bits, and the integer is less than p.
+ */
+static inline void
+f256_final_reduce(uint64_t *a)
+{
+	uint64_t r[5], t[5], w, cc;
+	int i;
+
+	/*
+	 * Propagate carries to ensure that limbs 0 to 3 fit on 52 bits.
+	 */
+	cc = 0;
+	for (i = 0; i < 5; i ++) {
+		w = a[i] + cc;
+		r[i] = w & MASK52;
+		cc = w >> 52;
+	}
+
+	/*
+	 * We compute t = r + (2^256 - p) = r + 2^224 - 2^192 - 2^96 + 1.
+	 * If t < 2^256, then r < p, and we return r. Otherwise, we
+	 * want to return r - p = t - 2^256.
+	 */
+
+	/*
+	 * Add 2^224 + 1, and propagate carries to ensure that limbs
+	 * t[0] to t[3] fit in 52 bits each.
+	 */
+	w = r[0] + 1;
+	t[0] = w & MASK52;
+	cc = w >> 52;
+	w = r[1] + cc;
+	t[1] = w & MASK52;
+	cc = w >> 52;
+	w = r[2] + cc;
+	t[2] = w & MASK52;
+	cc = w >> 52;
+	w = r[3] + cc;
+	t[3] = w & MASK52;
+	cc = w >> 52;
+	t[4] = r[4] + cc + BIT(16);
+
+	/*
+	 * Subtract 2^192 + 2^96. Since we just added 2^224 + 1, the
+	 * result cannot be negative.
+	 */
+	w = t[1] - BIT(44);
+	t[1] = w & MASK52;
+	cc = w >> 63;
+	w = t[2] - cc;
+	t[2] = w & MASK52;
+	cc = w >> 63;
+	w = t[3] - BIT(36) - cc;
+	t[3] = w & MASK52;
+	cc = w >> 63;
+	t[4] -= cc;
+
+	/*
+	 * If the top limb t[4] fits on 48 bits, then r[] is already
+	 * in the proper range. Otherwise, t[] is the value to return
+	 * (truncated to 256 bits).
+	 */
+	cc = -(t[4] >> 48);
+	t[4] &= MASK48;
+	for (i = 0; i < 5; i ++) {
+		a[i] = r[i] ^ (cc & (r[i] ^ t[i]));
+	}
+}
+
+/*
+ * Points in affine and Jacobian coordinates.
+ *
+ *  - In affine coordinates, the point-at-infinity cannot be encoded.
+ *  - Jacobian coordinates (X,Y,Z) correspond to affine (X/Z^2,Y/Z^3);
+ *    if Z = 0 then this is the point-at-infinity.
+ */
+typedef struct {
+	uint64_t x[5];
+	uint64_t y[5];
+} p256_affine;
+
+typedef struct {
+	uint64_t x[5];
+	uint64_t y[5];
+	uint64_t z[5];
+} p256_jacobian;
+
+/*
+ * Decode a field element (unsigned big endian notation).
+ */
+static void
+f256_decode(uint64_t *a, const unsigned char *buf)
+{
+	uint64_t w0, w1, w2, w3;
+
+	w3 = br_dec64be(buf +  0);
+	w2 = br_dec64be(buf +  8);
+	w1 = br_dec64be(buf + 16);
+	w0 = br_dec64be(buf + 24);
+	a[0] = w0 & MASK52;
+	a[1] = ((w0 >> 52) | (w1 << 12)) & MASK52;
+	a[2] = ((w1 >> 40) | (w2 << 24)) & MASK52;
+	a[3] = ((w2 >> 28) | (w3 << 36)) & MASK52;
+	a[4] = w3 >> 16;
+}
+
+/*
+ * Encode a field element (unsigned big endian notation). The field
+ * element MUST be fully reduced.
+ */
+static void
+f256_encode(unsigned char *buf, const uint64_t *a)
+{
+	uint64_t w0, w1, w2, w3;
+
+	w0 = a[0] | (a[1] << 52);
+	w1 = (a[1] >> 12) | (a[2] << 40);
+	w2 = (a[2] >> 24) | (a[3] << 28);
+	w3 = (a[3] >> 36) | (a[4] << 16);
+	br_enc64be(buf +  0, w3);
+	br_enc64be(buf +  8, w2);
+	br_enc64be(buf + 16, w1);
+	br_enc64be(buf + 24, w0);
+}
+
+/*
+ * Decode a point. The returned point is in Jacobian coordinates, but
+ * with z = 1. If the encoding is invalid, or encodes a point which is
+ * not on the curve, or encodes the point at infinity, then this function
+ * returns 0. Otherwise, 1 is returned.
+ *
+ * The buffer is assumed to have length exactly 65 bytes.
+ */
+static uint32_t
+point_decode(p256_jacobian *P, const unsigned char *buf)
+{
+	uint64_t x[5], y[5], t[5], x3[5], tt;
+	uint32_t r;
+
+	/*
+	 * Header byte shall be 0x04.
+	 */
+	r = EQ(buf[0], 0x04);
+
+	/*
+	 * Decode X and Y coordinates, and convert them into
+	 * Montgomery representation.
+	 */
+	f256_decode(x, buf +  1);
+	f256_decode(y, buf + 33);
+	f256_tomonty(x, x);
+	f256_tomonty(y, y);
+
+	/*
+	 * Verify y^2 = x^3 + A*x + B. In curve P-256, A = -3.
+	 * Note that the Montgomery representation of 0 is 0. We must
+	 * take care to apply the final reduction to make sure we have
+	 * 0 and not p.
+	 */
+	f256_montysquare(t, y);
+	f256_montysquare(x3, x);
+	f256_montymul(x3, x3, x);
+	f256_sub(t, t, x3);
+	f256_add(t, t, x);
+	f256_add(t, t, x);
+	f256_add(t, t, x);
+	f256_sub(t, t, P256_B_MONTY);
+	f256_final_reduce(t);
+	tt = t[0] | t[1] | t[2] | t[3] | t[4];
+	r &= EQ((uint32_t)(tt | (tt >> 32)), 0);
+
+	/*
+	 * Return the point in Jacobian coordinates (and Montgomery
+	 * representation).
+	 */
+	memcpy(P->x, x, sizeof x);
+	memcpy(P->y, y, sizeof y);
+	memcpy(P->z, F256_R, sizeof F256_R);
+	return r;
+}
+
+/*
+ * Final conversion for a point:
+ *  - The point is converted back to affine coordinates.
+ *  - Final reduction is performed.
+ *  - The point is encoded into the provided buffer.
+ *
+ * If the point is the point-at-infinity, all operations are performed,
+ * but the buffer contents are indeterminate, and 0 is returned. Otherwise,
+ * the encoded point is written in the buffer, and 1 is returned.
+ */
+static uint32_t
+point_encode(unsigned char *buf, const p256_jacobian *P)
+{
+	uint64_t t1[5], t2[5], z;
+
+	/* Set t1 = 1/z^2 and t2 = 1/z^3. */
+	f256_invert(t2, P->z);
+	f256_montysquare(t1, t2);
+	f256_montymul(t2, t2, t1);
+
+	/* Compute affine coordinates x (in t1) and y (in t2). */
+	f256_montymul(t1, P->x, t1);
+	f256_montymul(t2, P->y, t2);
+
+	/* Convert back from Montgomery representation, and finalize
+	   reductions. */
+	f256_frommonty(t1, t1);
+	f256_frommonty(t2, t2);
+	f256_final_reduce(t1);
+	f256_final_reduce(t2);
+
+	/* Encode. */
+	buf[0] = 0x04;
+	f256_encode(buf +  1, t1);
+	f256_encode(buf + 33, t2);
+
+	/* Return success if and only if P->z != 0. */
+	z = P->z[0] | P->z[1] | P->z[2] | P->z[3] | P->z[4];
+	return NEQ((uint32_t)(z | z >> 32), 0);
+}
+
+/*
+ * Point doubling in Jacobian coordinates: point P is doubled.
+ * Note: if the source point is the point-at-infinity, then the result is
+ * still the point-at-infinity, which is correct. Moreover, if the three
+ * coordinates were zero, then they still are zero in the returned value.
+ */
+static void
+p256_double(p256_jacobian *P)
+{
+	/*
+	 * Doubling formulas are:
+	 *
+	 *   s = 4*x*y^2
+	 *   m = 3*(x + z^2)*(x - z^2)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y^4
+	 *   z' = 2*y*z
+	 *
+	 * These formulas work for all points, including points of order 2
+	 * and points at infinity:
+	 *   - If y = 0 then z' = 0. But there is no such point in P-256
+	 *     anyway.
+	 *   - If z = 0 then z' = 0.
+	 */
+	uint64_t t1[5], t2[5], t3[5], t4[5];
+
+	/*
+	 * Compute z^2 in t1.
+	 */
+	f256_montysquare(t1, P->z);
+
+	/*
+	 * Compute x-z^2 in t2 and x+z^2 in t1.
+	 */
+	f256_add(t2, P->x, t1);
+	f256_sub(t1, P->x, t1);
+
+	/*
+	 * Compute 3*(x+z^2)*(x-z^2) in t1.
+	 */
+	f256_montymul(t3, t1, t2);
+	f256_add(t1, t3, t3);
+	f256_add(t1, t3, t1);
+
+	/*
+	 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	f256_montysquare(t3, P->y);
+	f256_add(t3, t3, t3);
+	f256_montymul(t2, P->x, t3);
+	f256_add(t2, t2, t2);
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	f256_montysquare(P->x, t1);
+	f256_sub(P->x, P->x, t2);
+	f256_sub(P->x, P->x, t2);
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	f256_montymul(t4, P->y, P->z);
+	f256_add(P->z, t4, t4);
+	f256_partial_reduce(P->z);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	f256_sub(t2, t2, P->x);
+	f256_montymul(P->y, t1, t2);
+	f256_montysquare(t4, t3);
+	f256_add(t4, t4, t4);
+	f256_sub(P->y, P->y, t4);
+}
+
+/*
+ * Point addition (Jacobian coordinates): P1 is replaced with P1+P2.
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0 but P2 != 0
+ *   - If P1 != 0 but P2 == 0
+ *   - If P1 == P2
+ *
+ * In all three cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate.
+ *   - P1 == 0 and P2 == 0.
+ *   - The Y coordinate of one of the points is 0 and the other point is
+ *     the point at infinity.
+ *
+ * The third case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ *
+ * Note that you can get a returned value of 0 with a correct result,
+ * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates.
+ */
+static uint32_t
+p256_add(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1 * z2^2
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1 * z2^3
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1 * z2
+	 */
+	uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt;
+	uint32_t ret;
+
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	f256_montysquare(t3, P2->z);
+	f256_montymul(t1, P1->x, t3);
+	f256_montymul(t4, P2->z, t3);
+	f256_montymul(t3, P1->y, t4);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+	f256_final_reduce(t4);
+	tt = t4[0] | t4[1] | t4[2] | t4[3] | t4[4];
+	ret = (uint32_t)(tt | (tt >> 32));
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	f256_montymul(t1, P1->z, P2->z);
+	f256_montymul(P1->z, t1, t2);
+
+	return ret;
+}
+
+/*
+ * Point addition (mixed coordinates): P1 is replaced with P1+P2.
+ * This is a specialised function for the case when P2 is a non-zero point
+ * in affine coordinates.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0
+ *   - If P1 == P2
+ *
+ * In both cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y (affine) coordinate.
+ *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
+ *
+ * The second case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ *
+ * Again, a value of 0 may be returned in some cases where the addition
+ * result is correct.
+ */
+static uint32_t
+p256_add_mixed(p256_jacobian *P1, const p256_affine *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 */
+	uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt;
+	uint32_t ret;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+	f256_final_reduce(t4);
+	tt = t4[0] | t4[1] | t4[2] | t4[3] | t4[4];
+	ret = (uint32_t)(tt | (tt >> 32));
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	f256_montymul(P1->z, P1->z, t2);
+
+	return ret;
+}
+
+#if 0
+/* unused */
+/*
+ * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2.
+ * This is a specialised function for the case when P2 is a non-zero point
+ * in affine coordinates.
+ *
+ * This function returns the correct result in all cases.
+ */
+static uint32_t
+p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2)
+{
+	/*
+	 * Addtions formulas, in the general case, are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 *
+	 * These formulas mishandle the two following cases:
+	 *
+	 *  - If P1 is the point-at-infinity (z1 = 0), then z3 is
+	 *    incorrectly set to 0.
+	 *
+	 *  - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3
+	 *    are all set to 0.
+	 *
+	 * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then
+	 * we correctly get z3 = 0 (the point-at-infinity).
+	 *
+	 * To fix the case P1 = 0, we perform at the end a copy of P2
+	 * over P1, conditional to z1 = 0.
+	 *
+	 * For P1 = P2: in that case, both h and r are set to 0, and
+	 * we get x3, y3 and z3 equal to 0. We can test for that
+	 * occurrence to make a mask which will be all-one if P1 = P2,
+	 * or all-zero otherwise; then we can compute the double of P2
+	 * and add it, combined with the mask, to (x3,y3,z3).
+	 *
+	 * Using the doubling formulas in p256_double() on (x2,y2),
+	 * simplifying since P2 is affine (i.e. z2 = 1, implicitly),
+	 * we get:
+	 *   s = 4*x2*y2^2
+	 *   m = 3*(x2 + 1)*(x2 - 1)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y2^4
+	 *   z' = 2*y2
+	 * which requires only 6 multiplications. Added to the 11
+	 * multiplications of the normal mixed addition in Jacobian
+	 * coordinates, we get a cost of 17 multiplications in total.
+	 */
+	uint64_t t1[5], t2[5], t3[5], t4[5], t5[5], t6[5], t7[5], tt, zz;
+	int i;
+
+	/*
+	 * Set zz to -1 if P1 is the point at infinity, 0 otherwise.
+	 */
+	zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3] | P1->z[4];
+	zz = ((zz | -zz) >> 63) - (uint64_t)1;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+
+	/*
+	 * If both h = 0 and r = 0, then P1 = P2, and we want to set
+	 * the mask tt to -1; otherwise, the mask will be 0.
+	 */
+	f256_final_reduce(t2);
+	f256_final_reduce(t4);
+	tt = t2[0] | t2[1] | t2[2] | t2[3] | t2[4]
+		| t4[0] | t4[1] | t4[2] | t4[3] | t4[4];
+	tt = ((tt | -tt) >> 63) - (uint64_t)1;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1.
+	 */
+	f256_montymul(P1->z, P1->z, t2);
+
+	/*
+	 * The "double" result, in case P1 = P2.
+	 */
+
+	/*
+	 * Compute z' = 2*y2 (in t1).
+	 */
+	f256_add(t1, P2->y, P2->y);
+	f256_partial_reduce(t1);
+
+	/*
+	 * Compute 2*(y2^2) (in t2) and s = 4*x2*(y2^2) (in t3).
+	 */
+	f256_montysquare(t2, P2->y);
+	f256_add(t2, t2, t2);
+	f256_add(t3, t2, t2);
+	f256_montymul(t3, P2->x, t3);
+
+	/*
+	 * Compute m = 3*(x2^2 - 1) (in t4).
+	 */
+	f256_montysquare(t4, P2->x);
+	f256_sub(t4, t4, F256_R);
+	f256_add(t5, t4, t4);
+	f256_add(t4, t4, t5);
+
+	/*
+	 * Compute x' = m^2 - 2*s (in t5).
+	 */
+	f256_montysquare(t5, t4);
+	f256_sub(t5, t3);
+	f256_sub(t5, t3);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y2^4 (in t6).
+	 */
+	f256_sub(t6, t3, t5);
+	f256_montymul(t6, t6, t4);
+	f256_montysquare(t7, t2);
+	f256_sub(t6, t6, t7);
+	f256_sub(t6, t6, t7);
+
+	/*
+	 * We now have the alternate (doubling) coordinates in (t5,t6,t1).
+	 * We combine them with (x3,y3,z3).
+	 */
+	for (i = 0; i < 5; i ++) {
+		P1->x[i] |= tt & t5[i];
+		P1->y[i] |= tt & t6[i];
+		P1->z[i] |= tt & t1[i];
+	}
+
+	/*
+	 * If P1 = 0, then we get z3 = 0 (which is invalid); if z1 is 0,
+	 * then we want to replace the result with a copy of P2. The
+	 * test on z1 was done at the start, in the zz mask.
+	 */
+	for (i = 0; i < 5; i ++) {
+		P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]);
+		P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]);
+		P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]);
+	}
+}
+#endif
+
+/*
+ * Inner function for computing a point multiplication. A window is
+ * provided, with points 1*P to 15*P in affine coordinates.
+ *
+ * Assumptions:
+ *  - All provided points are valid points on the curve.
+ *  - Multiplier is non-zero, and smaller than the curve order.
+ *  - Everything is in Montgomery representation.
+ */
+static void
+point_mul_inner(p256_jacobian *R, const p256_affine *W,
+	const unsigned char *k, size_t klen)
+{
+	p256_jacobian Q;
+	uint32_t qz;
+
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (klen -- > 0) {
+		int i;
+		unsigned bk;
+
+		bk = *k ++;
+		for (i = 0; i < 2; i ++) {
+			uint32_t bits;
+			uint32_t bnz;
+			p256_affine T;
+			p256_jacobian U;
+			uint32_t n;
+			int j;
+			uint64_t m;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			bits = (bk >> 4) & 0x0F;
+			bnz = NEQ(bits, 0);
+
+			/*
+			 * Lookup point in window. If the bits are 0,
+			 * we get something invalid, which is not a
+			 * problem because we will use it only if the
+			 * bits are non-zero.
+			 */
+			memset(&T, 0, sizeof T);
+			for (n = 0; n < 15; n ++) {
+				m = -(uint64_t)EQ(bits, n + 1);
+				T.x[0] |= m & W[n].x[0];
+				T.x[1] |= m & W[n].x[1];
+				T.x[2] |= m & W[n].x[2];
+				T.x[3] |= m & W[n].x[3];
+				T.x[4] |= m & W[n].x[4];
+				T.y[0] |= m & W[n].y[0];
+				T.y[1] |= m & W[n].y[1];
+				T.y[2] |= m & W[n].y[2];
+				T.y[3] |= m & W[n].y[3];
+				T.y[4] |= m & W[n].y[4];
+			}
+
+			U = Q;
+			p256_add_mixed(&U, &T);
+
+			/*
+			 * If qz is still 1, then Q was all-zeros, and this
+			 * is conserved through p256_double().
+			 */
+			m = -(uint64_t)(bnz & qz);
+			for (j = 0; j < 5; j ++) {
+				Q.x[j] ^= m & (Q.x[j] ^ T.x[j]);
+				Q.y[j] ^= m & (Q.y[j] ^ T.y[j]);
+				Q.z[j] ^= m & (Q.z[j] ^ F256_R[j]);
+			}
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+			bk <<= 4;
+		}
+	}
+	*R = Q;
+}
+
+/*
+ * Convert a window from Jacobian to affine coordinates. A single
+ * field inversion is used. This function works for windows up to
+ * 32 elements.
+ *
+ * The destination array (aff[]) and the source array (jac[]) may
+ * overlap, provided that the start of aff[] is not after the start of
+ * jac[]. Even if the arrays do _not_ overlap, the source array is
+ * modified.
+ */
+static void
+window_to_affine(p256_affine *aff, p256_jacobian *jac, int num)
+{
+	/*
+	 * Convert the window points to affine coordinates. We use the
+	 * following trick to mutualize the inversion computation: if
+	 * we have z1, z2, z3, and z4, and want to invert all of them,
+	 * we compute u = 1/(z1*z2*z3*z4), and then we have:
+	 *   1/z1 = u*z2*z3*z4
+	 *   1/z2 = u*z1*z3*z4
+	 *   1/z3 = u*z1*z2*z4
+	 *   1/z4 = u*z1*z2*z3
+	 *
+	 * The partial products are computed recursively:
+	 *
+	 *  - on input (z_1,z_2), return (z_2,z_1) and z_1*z_2
+	 *  - on input (z_1,z_2,... z_n):
+	 *       recurse on (z_1,z_2,... z_(n/2)) -> r1 and m1
+	 *       recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2
+	 *       multiply elements of r1 by m2 -> s1
+	 *       multiply elements of r2 by m1 -> s2
+	 *       return r1||r2 and m1*m2
+	 *
+	 * In the example below, we suppose that we have 14 elements.
+	 * Let z1, z2,... zE be the 14 values to invert (index noted in
+	 * hexadecimal, starting at 1).
+	 *
+	 *  - Depth 1:
+	 *      swap(z1, z2); z12 = z1*z2
+	 *      swap(z3, z4); z34 = z3*z4
+	 *      swap(z5, z6); z56 = z5*z6
+	 *      swap(z7, z8); z78 = z7*z8
+	 *      swap(z9, zA); z9A = z9*zA
+	 *      swap(zB, zC); zBC = zB*zC
+	 *      swap(zD, zE); zDE = zD*zE
+	 *
+	 *  - Depth 2:
+	 *      z1 <- z1*z34, z2 <- z2*z34, z3 <- z3*z12, z4 <- z4*z12
+	 *      z1234 = z12*z34
+	 *      z5 <- z5*z78, z6 <- z6*z78, z7 <- z7*z56, z8 <- z8*z56
+	 *      z5678 = z56*z78
+	 *      z9 <- z9*zBC, zA <- zA*zBC, zB <- zB*z9A, zC <- zC*z9A
+	 *      z9ABC = z9A*zBC
+	 *
+	 *  - Depth 3:
+	 *      z1 <- z1*z5678, z2 <- z2*z5678, z3 <- z3*z5678, z4 <- z4*z5678
+	 *      z5 <- z5*z1234, z6 <- z6*z1234, z7 <- z7*z1234, z8 <- z8*z1234
+	 *      z12345678 = z1234*z5678
+	 *      z9 <- z9*zDE, zA <- zA*zDE, zB <- zB*zDE, zC <- zC*zDE
+	 *      zD <- zD*z9ABC, zE*z9ABC
+	 *      z9ABCDE = z9ABC*zDE
+	 *
+	 *  - Depth 4:
+	 *      multiply z1..z8 by z9ABCDE
+	 *      multiply z9..zE by z12345678
+	 *      final z = z12345678*z9ABCDE
+	 */
+
+	uint64_t z[16][5];
+	int i, k, s;
+#define zt   (z[15])
+#define zu   (z[14])
+#define zv   (z[13])
+
+	/*
+	 * First recursion step (pairwise swapping and multiplication).
+	 * If there is an odd number of elements, then we "invent" an
+	 * extra one with coordinate Z = 1 (in Montgomery representation).
+	 */
+	for (i = 0; (i + 1) < num; i += 2) {
+		memcpy(zt, jac[i].z, sizeof zt);
+		memcpy(jac[i].z, jac[i + 1].z, sizeof zt);
+		memcpy(jac[i + 1].z, zt, sizeof zt);
+		f256_montymul(z[i >> 1], jac[i].z, jac[i + 1].z);
+	}
+	if ((num & 1) != 0) {
+		memcpy(z[num >> 1], jac[num - 1].z, sizeof zt);
+		memcpy(jac[num - 1].z, F256_R, sizeof F256_R);
+	}
+
+	/*
+	 * Perform further recursion steps. At the entry of each step,
+	 * the process has been done for groups of 's' points. The
+	 * integer k is the log2 of s.
+	 */
+	for (k = 1, s = 2; s < num; k ++, s <<= 1) {
+		int n;
+
+		for (i = 0; i < num; i ++) {
+			f256_montymul(jac[i].z, jac[i].z, z[(i >> k) ^ 1]);
+		}
+		n = (num + s - 1) >> k;
+		for (i = 0; i < (n >> 1); i ++) {
+			f256_montymul(z[i], z[i << 1], z[(i << 1) + 1]);
+		}
+		if ((n & 1) != 0) {
+			memmove(z[n >> 1], z[n], sizeof zt);
+		}
+	}
+
+	/*
+	 * Invert the final result, and convert all points.
+	 */
+	f256_invert(zt, z[0]);
+	for (i = 0; i < num; i ++) {
+		f256_montymul(zv, jac[i].z, zt);
+		f256_montysquare(zu, zv);
+		f256_montymul(zv, zv, zu);
+		f256_montymul(aff[i].x, jac[i].x, zu);
+		f256_montymul(aff[i].y, jac[i].y, zv);
+	}
+}
+
+/*
+ * Multiply the provided point by an integer.
+ * Assumptions:
+ *  - Source point is a valid curve point.
+ *  - Source point is not the point-at-infinity.
+ *  - Integer is not 0, and is lower than the curve order.
+ * If these conditions are not met, then the result is indeterminate
+ * (but the process is still constant-time).
+ */
+static void
+p256_mul(p256_jacobian *P, const unsigned char *k, size_t klen)
+{
+	union {
+		p256_affine aff[15];
+		p256_jacobian jac[15];
+	} window;
+	int i;
+
+	/*
+	 * Compute window, in Jacobian coordinates.
+	 */
+	window.jac[0] = *P;
+	for (i = 2; i < 16; i ++) {
+		window.jac[i - 1] = window.jac[(i >> 1) - 1];
+		if ((i & 1) == 0) {
+			p256_double(&window.jac[i - 1]);
+		} else {
+			p256_add(&window.jac[i - 1], &window.jac[i >> 1]);
+		}
+	}
+
+	/*
+	 * Convert the window points to affine coordinates. Point
+	 * window[0] is the source point, already in affine coordinates.
+	 */
+	window_to_affine(window.aff, window.jac, 15);
+
+	/*
+	 * Perform point multiplication.
+	 */
+	point_mul_inner(P, window.aff, k, klen);
+}
+
+/*
+ * Precomputed window for the conventional generator: P256_Gwin[n]
+ * contains (n+1)*G (affine coordinates, in Montgomery representation).
+ */
+static const p256_affine P256_Gwin[] = {
+	{
+		{ 0x30D418A9143C1, 0xC4FEDB60179E7, 0x62251075BA95F,
+		  0x5C669FB732B77, 0x08905F76B5375 },
+		{ 0x5357CE95560A8, 0x43A19E45CDDF2, 0x21F3258B4AB8E,
+		  0xD8552E88688DD, 0x0571FF18A5885 }
+	},
+	{
+		{ 0x46D410DDD64DF, 0x0B433827D8500, 0x1490D9AA6AE3C,
+		  0xA3A832205038D, 0x06BB32E52DCF3 },
+		{ 0x48D361BEE1A57, 0xB7B236FF82F36, 0x042DBE152CD7C,
+		  0xA3AA9A8FB0E92, 0x08C577517A5B8 }
+	},
+	{
+		{ 0x3F904EEBC1272, 0x9E87D81FBFFAC, 0xCBBC98B027F84,
+		  0x47E46AD77DD87, 0x06936A3FD6FF7 },
+		{ 0x5C1FC983A7EBD, 0xC3861FE1AB04C, 0x2EE98E583E47A,
+		  0xC06A88208311A, 0x05F06A2AB587C }
+	},
+	{
+		{ 0xB50D46918DCC5, 0xD7623C17374B0, 0x100AF24650A6E,
+		  0x76ABCDAACACE8, 0x077362F591B01 },
+		{ 0xF24CE4CBABA68, 0x17AD6F4472D96, 0xDDD22E1762847,
+		  0x862EB6C36DEE5, 0x04B14C39CC5AB }
+	},
+	{
+		{ 0x8AAEC45C61F5C, 0x9D4B9537DBE1B, 0x76C20C90EC649,
+		  0x3C7D41CB5AAD0, 0x0907960649052 },
+		{ 0x9B4AE7BA4F107, 0xF75EB882BEB30, 0x7A1F6873C568E,
+		  0x915C540A9877E, 0x03A076BB9DD1E }
+	},
+	{
+		{ 0x47373E77664A1, 0xF246CEE3E4039, 0x17A3AD55AE744,
+		  0x673C50A961A5B, 0x03074B5964213 },
+		{ 0x6220D377E44BA, 0x30DFF14B593D3, 0x639F11299C2B5,
+		  0x75F5424D44CEF, 0x04C9916DEA07F }
+	},
+	{
+		{ 0x354EA0173B4F1, 0x3C23C00F70746, 0x23BB082BD2021,
+		  0xE03E43EAAB50C, 0x03BA5119D3123 },
+		{ 0xD0303F5B9D4DE, 0x17DA67BDD2847, 0xC941956742F2F,
+		  0x8670F933BDC77, 0x0AEDD9164E240 }
+	},
+	{
+		{ 0x4CD19499A78FB, 0x4BF9B345527F1, 0x2CFC6B462AB5C,
+		  0x30CDF90F02AF0, 0x0763891F62652 },
+		{ 0xA3A9532D49775, 0xD7F9EBA15F59D, 0x60BBF021E3327,
+		  0xF75C23C7B84BE, 0x06EC12F2C706D }
+	},
+	{
+		{ 0x6E8F264E20E8E, 0xC79A7A84175C9, 0xC8EB00ABE6BFE,
+		  0x16A4CC09C0444, 0x005B3081D0C4E },
+		{ 0x777AA45F33140, 0xDCE5D45E31EB7, 0xB12F1A56AF7BE,
+		  0xF9B2B6E019A88, 0x086659CDFD835 }
+	},
+	{
+		{ 0xDBD19DC21EC8C, 0x94FCF81392C18, 0x250B4998F9868,
+		  0x28EB37D2CD648, 0x0C61C947E4B34 },
+		{ 0x407880DD9E767, 0x0C83FBE080C2B, 0x9BE5D2C43A899,
+		  0xAB4EF7D2D6577, 0x08719A555B3B4 }
+	},
+	{
+		{ 0x260A6245E4043, 0x53E7FDFE0EA7D, 0xAC1AB59DE4079,
+		  0x072EFF3A4158D, 0x0E7090F1949C9 },
+		{ 0x85612B944E886, 0xE857F61C81A76, 0xAD643D250F939,
+		  0x88DAC0DAA891E, 0x089300244125B }
+	},
+	{
+		{ 0x1AA7D26977684, 0x58A345A3304B7, 0x37385EABDEDEF,
+		  0x155E409D29DEE, 0x0EE1DF780B83E },
+		{ 0x12D91CBB5B437, 0x65A8956370CAC, 0xDE6D66170ED2F,
+		  0xAC9B8228CFA8A, 0x0FF57C95C3238 }
+	},
+	{
+		{ 0x25634B2ED7097, 0x9156FD30DCCC4, 0x9E98110E35676,
+		  0x7594CBCD43F55, 0x038477ACC395B },
+		{ 0x2B90C00EE17FF, 0xF842ED2E33575, 0x1F5BC16874838,
+		  0x7968CD06422BD, 0x0BC0876AB9E7B }
+	},
+	{
+		{ 0xA35BB0CF664AF, 0x68F9707E3A242, 0x832660126E48F,
+		  0x72D2717BF54C6, 0x0AAE7333ED12C },
+		{ 0x2DB7995D586B1, 0xE732237C227B5, 0x65E7DBBE29569,
+		  0xBBBD8E4193E2A, 0x052706DC3EAA1 }
+	},
+	{
+		{ 0xD8B7BC60055BE, 0xD76E27E4B72BC, 0x81937003CC23E,
+		  0xA090E337424E4, 0x02AA0E43EAD3D },
+		{ 0x524F6383C45D2, 0x422A41B2540B8, 0x8A4797D766355,
+		  0xDF444EFA6DE77, 0x0042170A9079A }
+	},
+};
+
+/*
+ * Multiply the conventional generator of the curve by the provided
+ * integer. Return is written in *P.
+ *
+ * Assumptions:
+ *  - Integer is not 0, and is lower than the curve order.
+ * If this conditions is not met, then the result is indeterminate
+ * (but the process is still constant-time).
+ */
+static void
+p256_mulgen(p256_jacobian *P, const unsigned char *k, size_t klen)
+{
+	point_mul_inner(P, P256_Gwin, k, klen);
+}
+
+/*
+ * Return 1 if all of the following hold:
+ *  - klen <= 32
+ *  - k != 0
+ *  - k is lower than the curve order
+ * Otherwise, return 0.
+ *
+ * Constant-time behaviour: only klen may be observable.
+ */
+static uint32_t
+check_scalar(const unsigned char *k, size_t klen)
+{
+	uint32_t z;
+	int32_t c;
+	size_t u;
+
+	if (klen > 32) {
+		return 0;
+	}
+	z = 0;
+	for (u = 0; u < klen; u ++) {
+		z |= k[u];
+	}
+	if (klen == 32) {
+		c = 0;
+		for (u = 0; u < klen; u ++) {
+			c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]);
+		}
+	} else {
+		c = -1;
+	}
+	return NEQ(z, 0) & LT0(c);
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *k, size_t klen, int curve)
+{
+	uint32_t r;
+	p256_jacobian P;
+
+	(void)curve;
+	if (Glen != 65) {
+		return 0;
+	}
+	r = check_scalar(k, klen);
+	r &= point_decode(&P, G);
+	p256_mul(&P, k, klen);
+	r &= point_encode(G, &P);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *k, size_t klen, int curve)
+{
+	p256_jacobian P;
+
+	(void)curve;
+	p256_mulgen(&P, k, klen);
+	point_encode(R, &P);
+	return 65;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We might want to use Shamir's trick here: make a composite
+	 * window of u*P+v*Q points, to merge the two doubling-ladders
+	 * into one. This, however, has some complications:
+	 *
+	 *  - During the computation, we may hit the point-at-infinity.
+	 *    Thus, we would need p256_add_complete_mixed() (complete
+	 *    formulas for point addition), with a higher cost (17 muls
+	 *    instead of 11).
+	 *
+	 *  - A 4-bit window would be too large, since it would involve
+	 *    16*16-1 = 255 points. For the same window size as in the
+	 *    p256_mul() case, we would need to reduce the window size
+	 *    to 2 bits, and thus perform twice as many non-doubling
+	 *    point additions.
+	 *
+	 *  - The window may itself contain the point-at-infinity, and
+	 *    thus cannot be in all generality be made of affine points.
+	 *    Instead, we would need to make it a window of points in
+	 *    Jacobian coordinates. Even p256_add_complete_mixed() would
+	 *    be inappropriate.
+	 *
+	 * For these reasons, the code below performs two separate
+	 * point multiplications, then computes the final point addition
+	 * (which is both a "normal" addition, and a doubling, to handle
+	 * all cases).
+	 */
+
+	p256_jacobian P, Q;
+	uint32_t r, t, s;
+	uint64_t z;
+
+	(void)curve;
+	if (len != 65) {
+		return 0;
+	}
+	r = point_decode(&P, A);
+	p256_mul(&P, x, xlen);
+	if (B == NULL) {
+		p256_mulgen(&Q, y, ylen);
+	} else {
+		r &= point_decode(&Q, B);
+		p256_mul(&Q, y, ylen);
+	}
+
+	/*
+	 * The final addition may fail in case both points are equal.
+	 */
+	t = p256_add(&P, &Q);
+	f256_final_reduce(P.z);
+	z = P.z[0] | P.z[1] | P.z[2] | P.z[3] | P.z[4];
+	s = EQ((uint32_t)(z | (z >> 32)), 0);
+	p256_double(&Q);
+
+	/*
+	 * If s is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   s = 0, t = 0   return P (normal addition)
+	 *   s = 0, t = 1   return P (normal addition)
+	 *   s = 1, t = 0   return Q (a 'double' case)
+	 *   s = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(s & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P);
+	r &= ~(s & t);
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_p256_m62 = {
+	(uint32_t)0x00800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_p256_m62_get(void)
+{
+	return &br_ec_p256_m62;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_p256_m62_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ec_p256_m64.c b/third_party/bearssl/src/ec_p256_m64.c
new file mode 100644
index 0000000..71a527c
--- /dev/null
+++ b/third_party/bearssl/src/ec_p256_m64.c
@@ -0,0 +1,1781 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_UMUL128
+#include <intrin.h>
+#endif
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8,
+	0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D,
+	0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8,
+	0x98, 0xC2, 0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E, 0x16, 0x2B,
+	0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, 0x40,
+	0x68, 0x37, 0xBF, 0x51, 0xF5
+};
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD,
+	0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63,
+	0x25, 0x51
+};
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_G;
+	return P256_G;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	(void)curve;
+	*len = sizeof P256_N;
+	return P256_N;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 1;
+}
+
+/*
+ * A field element is encoded as four 64-bit integers, in basis 2^64.
+ * Values may reach up to 2^256-1. Montgomery multiplication is used.
+ */
+
+/* R = 2^256 mod p */
+static const uint64_t F256_R[] = {
+	0x0000000000000001, 0xFFFFFFFF00000000,
+	0xFFFFFFFFFFFFFFFF, 0x00000000FFFFFFFE
+};
+
+/* Curve equation is y^2 = x^3 - 3*x + B. This constant is B*R mod p
+   (Montgomery representation of B). */
+static const uint64_t P256_B_MONTY[] = {
+	0xD89CDF6229C4BDDF, 0xACF005CD78843090,
+	0xE5A220ABF7212ED6, 0xDC30061D04874834
+};
+
+/*
+ * Addition in the field.
+ */
+static inline void
+f256_add(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+	unsigned __int128 w;
+	uint64_t t;
+
+	/*
+	 * Do the addition, with an extra carry in t.
+	 */
+	w = (unsigned __int128)a[0] + b[0];
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)a[1] + b[1] + (w >> 64);
+	d[1] = (uint64_t)w;
+	w = (unsigned __int128)a[2] + b[2] + (w >> 64);
+	d[2] = (uint64_t)w;
+	w = (unsigned __int128)a[3] + b[3] + (w >> 64);
+	d[3] = (uint64_t)w;
+	t = (uint64_t)(w >> 64);
+
+	/*
+	 * Fold carry t, using: 2^256 = 2^224 - 2^192 - 2^96 + 1 mod p.
+	 */
+	w = (unsigned __int128)d[0] + t;
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)d[1] + (w >> 64) - (t << 32);
+	d[1] = (uint64_t)w;
+	/* Here, carry "w >> 64" can only be 0 or -1 */
+	w = (unsigned __int128)d[2] - ((w >> 64) & 1);
+	d[2] = (uint64_t)w;
+	/* Again, carry is 0 or -1. But there can be carry only if t = 1,
+	   in which case the addition of (t << 32) - t is positive. */
+	w = (unsigned __int128)d[3] - ((w >> 64) & 1) + (t << 32) - t;
+	d[3] = (uint64_t)w;
+	t = (uint64_t)(w >> 64);
+
+	/*
+	 * There can be an extra carry here, which we must fold again.
+	 */
+	w = (unsigned __int128)d[0] + t;
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)d[1] + (w >> 64) - (t << 32);
+	d[1] = (uint64_t)w;
+	w = (unsigned __int128)d[2] - ((w >> 64) & 1);
+	d[2] = (uint64_t)w;
+	d[3] += (t << 32) - t - (uint64_t)((w >> 64) & 1);
+
+#elif BR_UMUL128
+
+	unsigned char cc;
+	uint64_t t;
+
+	cc = _addcarry_u64(0, a[0], b[0], &d[0]);
+	cc = _addcarry_u64(cc, a[1], b[1], &d[1]);
+	cc = _addcarry_u64(cc, a[2], b[2], &d[2]);
+	cc = _addcarry_u64(cc, a[3], b[3], &d[3]);
+
+	/*
+	 * If there is a carry, then we want to subtract p, which we
+	 * do by adding 2^256 - p.
+	 */
+	t = cc;
+	cc = _addcarry_u64(cc, d[0], 0, &d[0]);
+	cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);
+	cc = _addcarry_u64(cc, d[2], -t, &d[2]);
+	cc = _addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
+
+	/*
+	 * We have to do it again if there still is a carry.
+	 */
+	t = cc;
+	cc = _addcarry_u64(cc, d[0], 0, &d[0]);
+	cc = _addcarry_u64(cc, d[1], -(t << 32), &d[1]);
+	cc = _addcarry_u64(cc, d[2], -t, &d[2]);
+	(void)_addcarry_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
+
+#endif
+}
+
+/*
+ * Subtraction in the field.
+ */
+static inline void
+f256_sub(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	unsigned __int128 w;
+	uint64_t t;
+
+	w = (unsigned __int128)a[0] - b[0];
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)a[1] - b[1] - ((w >> 64) & 1);
+	d[1] = (uint64_t)w;
+	w = (unsigned __int128)a[2] - b[2] - ((w >> 64) & 1);
+	d[2] = (uint64_t)w;
+	w = (unsigned __int128)a[3] - b[3] - ((w >> 64) & 1);
+	d[3] = (uint64_t)w;
+	t = (uint64_t)(w >> 64) & 1;
+
+	/*
+	 * If there is a borrow (t = 1), then we must add the modulus
+	 * p = 2^256 - 2^224 + 2^192 + 2^96 - 1.
+	 */
+	w = (unsigned __int128)d[0] - t;
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1);
+	d[1] = (uint64_t)w;
+	/* Here, carry "w >> 64" can only be 0 or +1 */
+	w = (unsigned __int128)d[2] + (w >> 64);
+	d[2] = (uint64_t)w;
+	/* Again, carry is 0 or +1 */
+	w = (unsigned __int128)d[3] + (w >> 64) - (t << 32) + t;
+	d[3] = (uint64_t)w;
+	t = (uint64_t)(w >> 64) & 1;
+
+	/*
+	 * There may be again a borrow, in which case we must add the
+	 * modulus again.
+	 */
+	w = (unsigned __int128)d[0] - t;
+	d[0] = (uint64_t)w;
+	w = (unsigned __int128)d[1] + (t << 32) - ((w >> 64) & 1);
+	d[1] = (uint64_t)w;
+	w = (unsigned __int128)d[2] + (w >> 64);
+	d[2] = (uint64_t)w;
+	d[3] += (uint64_t)(w >> 64) - (t << 32) + t;
+
+#elif BR_UMUL128
+
+	unsigned char cc;
+	uint64_t t;
+
+	cc = _subborrow_u64(0, a[0], b[0], &d[0]);
+	cc = _subborrow_u64(cc, a[1], b[1], &d[1]);
+	cc = _subborrow_u64(cc, a[2], b[2], &d[2]);
+	cc = _subborrow_u64(cc, a[3], b[3], &d[3]);
+
+	/*
+	 * If there is a borrow, then we need to add p. We (virtually)
+	 * add 2^256, then subtract 2^256 - p.
+	 */
+	t = cc;
+	cc = _subborrow_u64(0, d[0], t, &d[0]);
+	cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);
+	cc = _subborrow_u64(cc, d[2], -t, &d[2]);
+	cc = _subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
+
+	/*
+	 * If there still is a borrow, then we need to add p again.
+	 */
+	t = cc;
+	cc = _subborrow_u64(0, d[0], t, &d[0]);
+	cc = _subborrow_u64(cc, d[1], -(t << 32), &d[1]);
+	cc = _subborrow_u64(cc, d[2], -t, &d[2]);
+	(void)_subborrow_u64(cc, d[3], (t << 32) - (t << 1), &d[3]);
+
+#endif
+}
+
+/*
+ * Montgomery multiplication in the field.
+ */
+static void
+f256_montymul(uint64_t *d, const uint64_t *a, const uint64_t *b)
+{
+#if BR_INT128
+
+	uint64_t x, f, t0, t1, t2, t3, t4;
+	unsigned __int128 z, ff;
+	int i;
+
+	/*
+	 * When computing d <- d + a[u]*b, we also add f*p such
+	 * that d + a[u]*b + f*p is a multiple of 2^64. Since
+	 * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64.
+	 */
+
+	/*
+	 * Step 1: t <- (a[0]*b + f*p) / 2^64
+	 * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this
+	 * ensures that (a[0]*b + f*p) is a multiple of 2^64.
+	 *
+	 * We also have: f*p = f*2^256 - f*2^224 + f*2^192 + f*2^96 - f.
+	 */
+	x = a[0];
+	z = (unsigned __int128)b[0] * x;
+	f = (uint64_t)z;
+	z = (unsigned __int128)b[1] * x + (z >> 64) + (uint64_t)(f << 32);
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)b[2] * x + (z >> 64) + (uint64_t)(f >> 32);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)b[3] * x + (z >> 64) + f;
+	t2 = (uint64_t)z;
+	t3 = (uint64_t)(z >> 64);
+	ff = ((unsigned __int128)f << 64) - ((unsigned __int128)f << 32);
+	z = (unsigned __int128)t2 + (uint64_t)ff;
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)t3 + (z >> 64) + (ff >> 64);
+	t3 = (uint64_t)z;
+	t4 = (uint64_t)(z >> 64);
+
+	/*
+	 * Steps 2 to 4: t <- (t + a[i]*b + f*p) / 2^64
+	 */
+	for (i = 1; i < 4; i ++) {
+		x = a[i];
+
+		/* t <- (t + x*b - f) / 2^64 */
+		z = (unsigned __int128)b[0] * x + t0;
+		f = (uint64_t)z;
+		z = (unsigned __int128)b[1] * x + t1 + (z >> 64);
+		t0 = (uint64_t)z;
+		z = (unsigned __int128)b[2] * x + t2 + (z >> 64);
+		t1 = (uint64_t)z;
+		z = (unsigned __int128)b[3] * x + t3 + (z >> 64);
+		t2 = (uint64_t)z;
+		z = t4 + (z >> 64);
+		t3 = (uint64_t)z;
+		t4 = (uint64_t)(z >> 64);
+
+		/* t <- t + f*2^32, carry in the upper half of z */
+		z = (unsigned __int128)t0 + (uint64_t)(f << 32);
+		t0 = (uint64_t)z;
+		z = (z >> 64) + (unsigned __int128)t1 + (uint64_t)(f >> 32);
+		t1 = (uint64_t)z;
+
+		/* t <- t + f*2^192 - f*2^160 + f*2^128 */
+		ff = ((unsigned __int128)f << 64) 
+			- ((unsigned __int128)f << 32) + f;
+		z = (z >> 64) + (unsigned __int128)t2 + (uint64_t)ff;
+		t2 = (uint64_t)z;
+		z = (unsigned __int128)t3 + (z >> 64) + (ff >> 64);
+		t3 = (uint64_t)z;
+		t4 += (uint64_t)(z >> 64);
+	}
+
+	/*
+	 * At that point, we have computed t = (a*b + F*p) / 2^256, where
+	 * F is a 256-bit integer whose limbs are the "f" coefficients
+	 * in the steps above. We have:
+	 *   a <= 2^256-1
+	 *   b <= 2^256-1
+	 *   F <= 2^256-1
+	 * Hence:
+	 *   a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1)
+	 *   a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p
+	 * Therefore:
+	 *   t < 2^256 + p - 2
+	 * Since p < 2^256, it follows that:
+	 *   t4 can be only 0 or 1
+	 *   t - p < 2^256
+	 * We can therefore subtract p from t, conditionally on t4, to
+	 * get a nonnegative result that fits on 256 bits.
+	 */
+	z = (unsigned __int128)t0 + t4;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)t1 - (t4 << 32) + (z >> 64);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)t2 - (z >> 127);
+	t2 = (uint64_t)z;
+	t3 = t3 - (uint64_t)(z >> 127) - t4 + (t4 << 32);
+
+	d[0] = t0;
+	d[1] = t1;
+	d[2] = t2;
+	d[3] = t3;
+
+#elif BR_UMUL128
+
+	uint64_t x, f, t0, t1, t2, t3, t4;
+	uint64_t zl, zh, ffl, ffh;
+	unsigned char k, m;
+	int i;
+
+	/*
+	 * When computing d <- d + a[u]*b, we also add f*p such
+	 * that d + a[u]*b + f*p is a multiple of 2^64. Since
+	 * p = -1 mod 2^64, we can compute f = d[0] + a[u]*b[0] mod 2^64.
+	 */
+
+	/*
+	 * Step 1: t <- (a[0]*b + f*p) / 2^64
+	 * We have f = a[0]*b[0] mod 2^64. Since p = -1 mod 2^64, this
+	 * ensures that (a[0]*b + f*p) is a multiple of 2^64.
+	 *
+	 * We also have: f*p = f*2^256 - f*2^224 + f*2^192 + f*2^96 - f.
+	 */
+	x = a[0];
+
+	zl = _umul128(b[0], x, &zh);
+	f = zl;
+	t0 = zh;
+
+	zl = _umul128(b[1], x, &zh);
+	k = _addcarry_u64(0, zl, t0, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	k = _addcarry_u64(0, zl, f << 32, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	t0 = zl;
+	t1 = zh;
+
+	zl = _umul128(b[2], x, &zh);
+	k = _addcarry_u64(0, zl, t1, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	k = _addcarry_u64(0, zl, f >> 32, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	t1 = zl;
+	t2 = zh;
+
+	zl = _umul128(b[3], x, &zh);
+	k = _addcarry_u64(0, zl, t2, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	k = _addcarry_u64(0, zl, f, &zl);
+	(void)_addcarry_u64(k, zh, 0, &zh);
+	t2 = zl;
+	t3 = zh;
+
+	t4 = _addcarry_u64(0, t3, f, &t3);
+	k = _subborrow_u64(0, t2, f << 32, &t2);
+	k = _subborrow_u64(k, t3, f >> 32, &t3);
+	(void)_subborrow_u64(k, t4, 0, &t4);
+
+	/*
+	 * Steps 2 to 4: t <- (t + a[i]*b + f*p) / 2^64
+	 */
+	for (i = 1; i < 4; i ++) {
+		x = a[i];
+		/* f = t0 + x * b[0]; -- computed below */
+
+		/* t <- (t + x*b - f) / 2^64 */
+		zl = _umul128(b[0], x, &zh);
+		k = _addcarry_u64(0, zl, t0, &f);
+		(void)_addcarry_u64(k, zh, 0, &t0);
+
+		zl = _umul128(b[1], x, &zh);
+		k = _addcarry_u64(0, zl, t0, &zl);
+		(void)_addcarry_u64(k, zh, 0, &zh);
+		k = _addcarry_u64(0, zl, t1, &t0);
+		(void)_addcarry_u64(k, zh, 0, &t1);
+
+		zl = _umul128(b[2], x, &zh);
+		k = _addcarry_u64(0, zl, t1, &zl);
+		(void)_addcarry_u64(k, zh, 0, &zh);
+		k = _addcarry_u64(0, zl, t2, &t1);
+		(void)_addcarry_u64(k, zh, 0, &t2);
+
+		zl = _umul128(b[3], x, &zh);
+		k = _addcarry_u64(0, zl, t2, &zl);
+		(void)_addcarry_u64(k, zh, 0, &zh);
+		k = _addcarry_u64(0, zl, t3, &t2);
+		(void)_addcarry_u64(k, zh, 0, &t3);
+
+		t4 = _addcarry_u64(0, t3, t4, &t3);
+
+		/* t <- t + f*2^32, carry in k */
+		k = _addcarry_u64(0, t0, f << 32, &t0);
+		k = _addcarry_u64(k, t1, f >> 32, &t1);
+
+		/* t <- t + f*2^192 - f*2^160 + f*2^128 */
+		m = _subborrow_u64(0, f, f << 32, &ffl);
+		(void)_subborrow_u64(m, f, f >> 32, &ffh);
+		k = _addcarry_u64(k, t2, ffl, &t2);
+		k = _addcarry_u64(k, t3, ffh, &t3);
+		(void)_addcarry_u64(k, t4, 0, &t4);
+	}
+
+	/*
+	 * At that point, we have computed t = (a*b + F*p) / 2^256, where
+	 * F is a 256-bit integer whose limbs are the "f" coefficients
+	 * in the steps above. We have:
+	 *   a <= 2^256-1
+	 *   b <= 2^256-1
+	 *   F <= 2^256-1
+	 * Hence:
+	 *   a*b + F*p <= (2^256-1)*(2^256-1) + p*(2^256-1)
+	 *   a*b + F*p <= 2^256*(2^256 - 2 + p) + 1 - p
+	 * Therefore:
+	 *   t < 2^256 + p - 2
+	 * Since p < 2^256, it follows that:
+	 *   t4 can be only 0 or 1
+	 *   t - p < 2^256
+	 * We can therefore subtract p from t, conditionally on t4, to
+	 * get a nonnegative result that fits on 256 bits.
+	 */
+	k = _addcarry_u64(0, t0, t4, &t0);
+	k = _addcarry_u64(k, t1, -(t4 << 32), &t1);
+	k = _addcarry_u64(k, t2, -t4, &t2);
+	(void)_addcarry_u64(k, t3, (t4 << 32) - (t4 << 1), &t3);
+
+	d[0] = t0;
+	d[1] = t1;
+	d[2] = t2;
+	d[3] = t3;
+
+#endif
+}
+
+/*
+ * Montgomery squaring in the field; currently a basic wrapper around
+ * multiplication (inline, should be optimized away).
+ * TODO: see if some extra speed can be gained here.
+ */
+static inline void
+f256_montysquare(uint64_t *d, const uint64_t *a)
+{
+	f256_montymul(d, a, a);
+}
+
+/*
+ * Convert to Montgomery representation.
+ */
+static void
+f256_tomonty(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * R2 = 2^512 mod p.
+	 * If R = 2^256 mod p, then R2 = R^2 mod p; and the Montgomery
+	 * multiplication of a by R2 is: a*R2/R = a*R mod p, i.e. the
+	 * conversion to Montgomery representation.
+	 */
+	static const uint64_t R2[] = {
+		0x0000000000000003,
+		0xFFFFFFFBFFFFFFFF,
+		0xFFFFFFFFFFFFFFFE,
+		0x00000004FFFFFFFD
+	};
+
+	f256_montymul(d, a, R2);
+}
+
+/*
+ * Convert from Montgomery representation.
+ */
+static void
+f256_frommonty(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * Montgomery multiplication by 1 is division by 2^256 modulo p.
+	 */
+	static const uint64_t one[] = { 1, 0, 0, 0 };
+
+	f256_montymul(d, a, one);
+}
+
+/*
+ * Inversion in the field. If the source value is 0 modulo p, then this
+ * returns 0 or p. This function uses Montgomery representation.
+ */
+static void
+f256_invert(uint64_t *d, const uint64_t *a)
+{
+	/*
+	 * We compute a^(p-2) mod p. The exponent pattern (from high to
+	 * low) is:
+	 *  - 32 bits of value 1
+	 *  - 31 bits of value 0
+	 *  - 1 bit of value 1
+	 *  - 96 bits of value 0
+	 *  - 94 bits of value 1
+	 *  - 1 bit of value 0
+	 *  - 1 bit of value 1
+	 * To speed up the square-and-multiply algorithm, we precompute
+	 * a^(2^31-1).
+	 */
+
+	uint64_t r[4], t[4];
+	int i;
+
+	memcpy(t, a, sizeof t);
+	for (i = 0; i < 30; i ++) {
+		f256_montysquare(t, t);
+		f256_montymul(t, t, a);
+	}
+
+	memcpy(r, t, sizeof t);
+	for (i = 224; i >= 0; i --) {
+		f256_montysquare(r, r);
+		switch (i) {
+		case 0:
+		case 2:
+		case 192:
+		case 224:
+			f256_montymul(r, r, a);
+			break;
+		case 3:
+		case 34:
+		case 65:
+			f256_montymul(r, r, t);
+			break;
+		}
+	}
+	memcpy(d, r, sizeof r);
+}
+
+/*
+ * Finalize reduction.
+ * Input value fits on 256 bits. This function subtracts p if and only
+ * if the input is greater than or equal to p.
+ */
+static inline void
+f256_final_reduce(uint64_t *a)
+{
+#if BR_INT128
+
+	uint64_t t0, t1, t2, t3, cc;
+	unsigned __int128 z;
+
+	/*
+	 * We add 2^224 - 2^192 - 2^96 + 1 to a. If there is no carry,
+	 * then a < p; otherwise, the addition result we computed is
+	 * the value we must return.
+	 */
+	z = (unsigned __int128)a[0] + 1;
+	t0 = (uint64_t)z;
+	z = (unsigned __int128)a[1] + (z >> 64) - ((uint64_t)1 << 32);
+	t1 = (uint64_t)z;
+	z = (unsigned __int128)a[2] - (z >> 127);
+	t2 = (uint64_t)z;
+	z = (unsigned __int128)a[3] - (z >> 127) + 0xFFFFFFFF;
+	t3 = (uint64_t)z;
+	cc = -(uint64_t)(z >> 64);
+
+	a[0] ^= cc & (a[0] ^ t0);
+	a[1] ^= cc & (a[1] ^ t1);
+	a[2] ^= cc & (a[2] ^ t2);
+	a[3] ^= cc & (a[3] ^ t3);
+
+#elif BR_UMUL128
+
+	uint64_t t0, t1, t2, t3, m;
+	unsigned char k;
+
+	k = _addcarry_u64(0, a[0], (uint64_t)1, &t0);
+	k = _addcarry_u64(k, a[1], -((uint64_t)1 << 32), &t1);
+	k = _addcarry_u64(k, a[2], -(uint64_t)1, &t2);
+	k = _addcarry_u64(k, a[3], ((uint64_t)1 << 32) - 2, &t3);
+	m = -(uint64_t)k;
+
+	a[0] ^= m & (a[0] ^ t0);
+	a[1] ^= m & (a[1] ^ t1);
+	a[2] ^= m & (a[2] ^ t2);
+	a[3] ^= m & (a[3] ^ t3);
+
+#endif
+}
+
+/*
+ * Points in affine and Jacobian coordinates.
+ *
+ *  - In affine coordinates, the point-at-infinity cannot be encoded.
+ *  - Jacobian coordinates (X,Y,Z) correspond to affine (X/Z^2,Y/Z^3);
+ *    if Z = 0 then this is the point-at-infinity.
+ */
+typedef struct {
+	uint64_t x[4];
+	uint64_t y[4];
+} p256_affine;
+
+typedef struct {
+	uint64_t x[4];
+	uint64_t y[4];
+	uint64_t z[4];
+} p256_jacobian;
+
+/*
+ * Decode a point. The returned point is in Jacobian coordinates, but
+ * with z = 1. If the encoding is invalid, or encodes a point which is
+ * not on the curve, or encodes the point at infinity, then this function
+ * returns 0. Otherwise, 1 is returned.
+ *
+ * The buffer is assumed to have length exactly 65 bytes.
+ */
+static uint32_t
+point_decode(p256_jacobian *P, const unsigned char *buf)
+{
+	uint64_t x[4], y[4], t[4], x3[4], tt;
+	uint32_t r;
+
+	/*
+	 * Header byte shall be 0x04.
+	 */
+	r = EQ(buf[0], 0x04);
+
+	/*
+	 * Decode X and Y coordinates, and convert them into
+	 * Montgomery representation.
+	 */
+	x[3] = br_dec64be(buf +  1);
+	x[2] = br_dec64be(buf +  9);
+	x[1] = br_dec64be(buf + 17);
+	x[0] = br_dec64be(buf + 25);
+	y[3] = br_dec64be(buf + 33);
+	y[2] = br_dec64be(buf + 41);
+	y[1] = br_dec64be(buf + 49);
+	y[0] = br_dec64be(buf + 57);
+	f256_tomonty(x, x);
+	f256_tomonty(y, y);
+
+	/*
+	 * Verify y^2 = x^3 + A*x + B. In curve P-256, A = -3.
+	 * Note that the Montgomery representation of 0 is 0. We must
+	 * take care to apply the final reduction to make sure we have
+	 * 0 and not p.
+	 */
+	f256_montysquare(t, y);
+	f256_montysquare(x3, x);
+	f256_montymul(x3, x3, x);
+	f256_sub(t, t, x3);
+	f256_add(t, t, x);
+	f256_add(t, t, x);
+	f256_add(t, t, x);
+	f256_sub(t, t, P256_B_MONTY);
+	f256_final_reduce(t);
+	tt = t[0] | t[1] | t[2] | t[3];
+	r &= EQ((uint32_t)(tt | (tt >> 32)), 0);
+
+	/*
+	 * Return the point in Jacobian coordinates (and Montgomery
+	 * representation).
+	 */
+	memcpy(P->x, x, sizeof x);
+	memcpy(P->y, y, sizeof y);
+	memcpy(P->z, F256_R, sizeof F256_R);
+	return r;
+}
+
+/*
+ * Final conversion for a point:
+ *  - The point is converted back to affine coordinates.
+ *  - Final reduction is performed.
+ *  - The point is encoded into the provided buffer.
+ *
+ * If the point is the point-at-infinity, all operations are performed,
+ * but the buffer contents are indeterminate, and 0 is returned. Otherwise,
+ * the encoded point is written in the buffer, and 1 is returned.
+ */
+static uint32_t
+point_encode(unsigned char *buf, const p256_jacobian *P)
+{
+	uint64_t t1[4], t2[4], z;
+
+	/* Set t1 = 1/z^2 and t2 = 1/z^3. */
+	f256_invert(t2, P->z);
+	f256_montysquare(t1, t2);
+	f256_montymul(t2, t2, t1);
+
+	/* Compute affine coordinates x (in t1) and y (in t2). */
+	f256_montymul(t1, P->x, t1);
+	f256_montymul(t2, P->y, t2);
+
+	/* Convert back from Montgomery representation, and finalize
+	   reductions. */
+	f256_frommonty(t1, t1);
+	f256_frommonty(t2, t2);
+	f256_final_reduce(t1);
+	f256_final_reduce(t2);
+
+	/* Encode. */
+	buf[0] = 0x04;
+	br_enc64be(buf +  1, t1[3]);
+	br_enc64be(buf +  9, t1[2]);
+	br_enc64be(buf + 17, t1[1]);
+	br_enc64be(buf + 25, t1[0]);
+	br_enc64be(buf + 33, t2[3]);
+	br_enc64be(buf + 41, t2[2]);
+	br_enc64be(buf + 49, t2[1]);
+	br_enc64be(buf + 57, t2[0]);
+
+	/* Return success if and only if P->z != 0. */
+	z = P->z[0] | P->z[1] | P->z[2] | P->z[3];
+	return NEQ((uint32_t)(z | z >> 32), 0);
+}
+
+/*
+ * Point doubling in Jacobian coordinates: point P is doubled.
+ * Note: if the source point is the point-at-infinity, then the result is
+ * still the point-at-infinity, which is correct. Moreover, if the three
+ * coordinates were zero, then they still are zero in the returned value.
+ *
+ * (Note: this is true even without the final reduction: if the three
+ * coordinates are encoded as four words of value zero each, then the
+ * result will also have all-zero coordinate encodings, not the alternate
+ * encoding as the integer p.)
+ */
+static void
+p256_double(p256_jacobian *P)
+{
+	/*
+	 * Doubling formulas are:
+	 *
+	 *   s = 4*x*y^2
+	 *   m = 3*(x + z^2)*(x - z^2)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y^4
+	 *   z' = 2*y*z
+	 *
+	 * These formulas work for all points, including points of order 2
+	 * and points at infinity:
+	 *   - If y = 0 then z' = 0. But there is no such point in P-256
+	 *     anyway.
+	 *   - If z = 0 then z' = 0.
+	 */
+	uint64_t t1[4], t2[4], t3[4], t4[4];
+
+	/*
+	 * Compute z^2 in t1.
+	 */
+	f256_montysquare(t1, P->z);
+
+	/*
+	 * Compute x-z^2 in t2 and x+z^2 in t1.
+	 */
+	f256_add(t2, P->x, t1);
+	f256_sub(t1, P->x, t1);
+
+	/*
+	 * Compute 3*(x+z^2)*(x-z^2) in t1.
+	 */
+	f256_montymul(t3, t1, t2);
+	f256_add(t1, t3, t3);
+	f256_add(t1, t3, t1);
+
+	/*
+	 * Compute 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	f256_montysquare(t3, P->y);
+	f256_add(t3, t3, t3);
+	f256_montymul(t2, P->x, t3);
+	f256_add(t2, t2, t2);
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	f256_montysquare(P->x, t1);
+	f256_sub(P->x, P->x, t2);
+	f256_sub(P->x, P->x, t2);
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	f256_montymul(t4, P->y, P->z);
+	f256_add(P->z, t4, t4);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	f256_sub(t2, t2, P->x);
+	f256_montymul(P->y, t1, t2);
+	f256_montysquare(t4, t3);
+	f256_add(t4, t4, t4);
+	f256_sub(P->y, P->y, t4);
+}
+
+/*
+ * Point addition (Jacobian coordinates): P1 is replaced with P1+P2.
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0 but P2 != 0
+ *   - If P1 != 0 but P2 == 0
+ *   - If P1 == P2
+ *
+ * In all three cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y coordinate.
+ *   - P1 == 0 and P2 == 0.
+ *   - The Y coordinate of one of the points is 0 and the other point is
+ *     the point at infinity.
+ *
+ * The third case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 and P2 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ *
+ * Note that you can get a returned value of 0 with a correct result,
+ * e.g. if P1 and P2 have the same Y coordinate, but distinct X coordinates.
+ */
+static uint32_t
+p256_add(p256_jacobian *P1, const p256_jacobian *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1 * z2^2
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1 * z2^3
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1 * z2
+	 */
+	uint64_t t1[4], t2[4], t3[4], t4[4], t5[4], t6[4], t7[4], tt;
+	uint32_t ret;
+
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	f256_montysquare(t3, P2->z);
+	f256_montymul(t1, P1->x, t3);
+	f256_montymul(t4, P2->z, t3);
+	f256_montymul(t3, P1->y, t4);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+	f256_final_reduce(t4);
+	tt = t4[0] | t4[1] | t4[2] | t4[3];
+	ret = (uint32_t)(tt | (tt >> 32));
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	f256_montymul(t1, P1->z, P2->z);
+	f256_montymul(P1->z, t1, t2);
+
+	return ret;
+}
+
+/*
+ * Point addition (mixed coordinates): P1 is replaced with P1+P2.
+ * This is a specialised function for the case when P2 is a non-zero point
+ * in affine coordinates.
+ *
+ * This function computes the wrong result in the following cases:
+ *
+ *   - If P1 == 0
+ *   - If P1 == P2
+ *
+ * In both cases, P1 is set to the point at infinity.
+ *
+ * Returned value is 0 if one of the following occurs:
+ *
+ *   - P1 and P2 have the same Y (affine) coordinate.
+ *   - The Y coordinate of P2 is 0 and P1 is the point at infinity.
+ *
+ * The second case cannot actually happen with valid points, since a point
+ * with Y == 0 is a point of order 2, and there is no point of order 2 on
+ * curve P-256.
+ *
+ * Therefore, assuming that P1 != 0 on input, then the caller
+ * can apply the following:
+ *
+ *   - If the result is not the point at infinity, then it is correct.
+ *   - Otherwise, if the returned value is 1, then this is a case of
+ *     P1+P2 == 0, so the result is indeed the point at infinity.
+ *   - Otherwise, P1 == P2, so a "double" operation should have been
+ *     performed.
+ *
+ * Again, a value of 0 may be returned in some cases where the addition
+ * result is correct.
+ */
+static uint32_t
+p256_add_mixed(p256_jacobian *P1, const p256_affine *P2)
+{
+	/*
+	 * Addtions formulas are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 */
+	uint64_t t1[4], t2[4], t3[4], t4[4], t5[4], t6[4], t7[4], tt;
+	uint32_t ret;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * We need to test whether r is zero, so we will do some extra
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+	f256_final_reduce(t4);
+	tt = t4[0] | t4[1] | t4[2] | t4[3];
+	ret = (uint32_t)(tt | (tt >> 32));
+	ret = (ret | -ret) >> 31;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	f256_montymul(P1->z, P1->z, t2);
+
+	return ret;
+}
+
+#if 0
+/* unused */
+/*
+ * Point addition (mixed coordinates, complete): P1 is replaced with P1+P2.
+ * This is a specialised function for the case when P2 is a non-zero point
+ * in affine coordinates.
+ *
+ * This function returns the correct result in all cases.
+ */
+static uint32_t
+p256_add_complete_mixed(p256_jacobian *P1, const p256_affine *P2)
+{
+	/*
+	 * Addtions formulas, in the general case, are:
+	 *
+	 *   u1 = x1
+	 *   u2 = x2 * z1^2
+	 *   s1 = y1
+	 *   s2 = y2 * z1^3
+	 *   h = u2 - u1
+	 *   r = s2 - s1
+	 *   x3 = r^2 - h^3 - 2 * u1 * h^2
+	 *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+	 *   z3 = h * z1
+	 *
+	 * These formulas mishandle the two following cases:
+	 *
+	 *  - If P1 is the point-at-infinity (z1 = 0), then z3 is
+	 *    incorrectly set to 0.
+	 *
+	 *  - If P1 = P2, then u1 = u2 and s1 = s2, and x3, y3 and z3
+	 *    are all set to 0.
+	 *
+	 * However, if P1 + P2 = 0, then u1 = u2 but s1 != s2, and then
+	 * we correctly get z3 = 0 (the point-at-infinity).
+	 *
+	 * To fix the case P1 = 0, we perform at the end a copy of P2
+	 * over P1, conditional to z1 = 0.
+	 *
+	 * For P1 = P2: in that case, both h and r are set to 0, and
+	 * we get x3, y3 and z3 equal to 0. We can test for that
+	 * occurrence to make a mask which will be all-one if P1 = P2,
+	 * or all-zero otherwise; then we can compute the double of P2
+	 * and add it, combined with the mask, to (x3,y3,z3).
+	 *
+	 * Using the doubling formulas in p256_double() on (x2,y2),
+	 * simplifying since P2 is affine (i.e. z2 = 1, implicitly),
+	 * we get:
+	 *   s = 4*x2*y2^2
+	 *   m = 3*(x2 + 1)*(x2 - 1)
+	 *   x' = m^2 - 2*s
+	 *   y' = m*(s - x') - 8*y2^4
+	 *   z' = 2*y2
+	 * which requires only 6 multiplications. Added to the 11
+	 * multiplications of the normal mixed addition in Jacobian
+	 * coordinates, we get a cost of 17 multiplications in total.
+	 */
+	uint64_t t1[4], t2[4], t3[4], t4[4], t5[4], t6[4], t7[4], tt, zz;
+	int i;
+
+	/*
+	 * Set zz to -1 if P1 is the point at infinity, 0 otherwise.
+	 */
+	zz = P1->z[0] | P1->z[1] | P1->z[2] | P1->z[3];
+	zz = ((zz | -zz) >> 63) - (uint64_t)1;
+
+	/*
+	 * Compute u1 = x1 (in t1) and s1 = y1 (in t3).
+	 */
+	memcpy(t1, P1->x, sizeof t1);
+	memcpy(t3, P1->y, sizeof t3);
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	f256_montysquare(t4, P1->z);
+	f256_montymul(t2, P2->x, t4);
+	f256_montymul(t5, P1->z, t4);
+	f256_montymul(t4, P2->y, t5);
+
+	/*
+	 * Compute h = h2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 * reduce.
+	 */
+	f256_sub(t2, t2, t1);
+	f256_sub(t4, t4, t3);
+
+	/*
+	 * If both h = 0 and r = 0, then P1 = P2, and we want to set
+	 * the mask tt to -1; otherwise, the mask will be 0.
+	 */
+	f256_final_reduce(t2);
+	f256_final_reduce(t4);
+	tt = t2[0] | t2[1] | t2[2] | t2[3] | t4[0] | t4[1] | t4[2] | t4[3];
+	tt = ((tt | -tt) >> 63) - (uint64_t)1;
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5);
+	 */
+	f256_montysquare(t7, t2);
+	f256_montymul(t6, t1, t7);
+	f256_montymul(t5, t7, t2);
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 */
+	f256_montysquare(P1->x, t4);
+	f256_sub(P1->x, P1->x, t5);
+	f256_sub(P1->x, P1->x, t6);
+	f256_sub(P1->x, P1->x, t6);
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	f256_sub(t6, t6, P1->x);
+	f256_montymul(P1->y, t4, t6);
+	f256_montymul(t1, t5, t3);
+	f256_sub(P1->y, P1->y, t1);
+
+	/*
+	 * Compute z3 = h*z1.
+	 */
+	f256_montymul(P1->z, P1->z, t2);
+
+	/*
+	 * The "double" result, in case P1 = P2.
+	 */
+
+	/*
+	 * Compute z' = 2*y2 (in t1).
+	 */
+	f256_add(t1, P2->y, P2->y);
+
+	/*
+	 * Compute 2*(y2^2) (in t2) and s = 4*x2*(y2^2) (in t3).
+	 */
+	f256_montysquare(t2, P2->y);
+	f256_add(t2, t2, t2);
+	f256_add(t3, t2, t2);
+	f256_montymul(t3, P2->x, t3);
+
+	/*
+	 * Compute m = 3*(x2^2 - 1) (in t4).
+	 */
+	f256_montysquare(t4, P2->x);
+	f256_sub(t4, t4, F256_R);
+	f256_add(t5, t4, t4);
+	f256_add(t4, t4, t5);
+
+	/*
+	 * Compute x' = m^2 - 2*s (in t5).
+	 */
+	f256_montysquare(t5, t4);
+	f256_sub(t5, t3);
+	f256_sub(t5, t3);
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y2^4 (in t6).
+	 */
+	f256_sub(t6, t3, t5);
+	f256_montymul(t6, t6, t4);
+	f256_montysquare(t7, t2);
+	f256_sub(t6, t6, t7);
+	f256_sub(t6, t6, t7);
+
+	/*
+	 * We now have the alternate (doubling) coordinates in (t5,t6,t1).
+	 * We combine them with (x3,y3,z3).
+	 */
+	for (i = 0; i < 4; i ++) {
+		P1->x[i] |= tt & t5[i];
+		P1->y[i] |= tt & t6[i];
+		P1->z[i] |= tt & t1[i];
+	}
+
+	/*
+	 * If P1 = 0, then we get z3 = 0 (which is invalid); if z1 is 0,
+	 * then we want to replace the result with a copy of P2. The
+	 * test on z1 was done at the start, in the zz mask.
+	 */
+	for (i = 0; i < 4; i ++) {
+		P1->x[i] ^= zz & (P1->x[i] ^ P2->x[i]);
+		P1->y[i] ^= zz & (P1->y[i] ^ P2->y[i]);
+		P1->z[i] ^= zz & (P1->z[i] ^ F256_R[i]);
+	}
+}
+#endif
+
+/*
+ * Inner function for computing a point multiplication. A window is
+ * provided, with points 1*P to 15*P in affine coordinates.
+ *
+ * Assumptions:
+ *  - All provided points are valid points on the curve.
+ *  - Multiplier is non-zero, and smaller than the curve order.
+ *  - Everything is in Montgomery representation.
+ */
+static void
+point_mul_inner(p256_jacobian *R, const p256_affine *W,
+	const unsigned char *k, size_t klen)
+{
+	p256_jacobian Q;
+	uint32_t qz;
+
+	memset(&Q, 0, sizeof Q);
+	qz = 1;
+	while (klen -- > 0) {
+		int i;
+		unsigned bk;
+
+		bk = *k ++;
+		for (i = 0; i < 2; i ++) {
+			uint32_t bits;
+			uint32_t bnz;
+			p256_affine T;
+			p256_jacobian U;
+			uint32_t n;
+			int j;
+			uint64_t m;
+
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			p256_double(&Q);
+			bits = (bk >> 4) & 0x0F;
+			bnz = NEQ(bits, 0);
+
+			/*
+			 * Lookup point in window. If the bits are 0,
+			 * we get something invalid, which is not a
+			 * problem because we will use it only if the
+			 * bits are non-zero.
+			 */
+			memset(&T, 0, sizeof T);
+			for (n = 0; n < 15; n ++) {
+				m = -(uint64_t)EQ(bits, n + 1);
+				T.x[0] |= m & W[n].x[0];
+				T.x[1] |= m & W[n].x[1];
+				T.x[2] |= m & W[n].x[2];
+				T.x[3] |= m & W[n].x[3];
+				T.y[0] |= m & W[n].y[0];
+				T.y[1] |= m & W[n].y[1];
+				T.y[2] |= m & W[n].y[2];
+				T.y[3] |= m & W[n].y[3];
+			}
+
+			U = Q;
+			p256_add_mixed(&U, &T);
+
+			/*
+			 * If qz is still 1, then Q was all-zeros, and this
+			 * is conserved through p256_double().
+			 */
+			m = -(uint64_t)(bnz & qz);
+			for (j = 0; j < 4; j ++) {
+				Q.x[j] |= m & T.x[j];
+				Q.y[j] |= m & T.y[j];
+				Q.z[j] |= m & F256_R[j];
+			}
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+			bk <<= 4;
+		}
+	}
+	*R = Q;
+}
+
+/*
+ * Convert a window from Jacobian to affine coordinates. A single
+ * field inversion is used. This function works for windows up to
+ * 32 elements.
+ *
+ * The destination array (aff[]) and the source array (jac[]) may
+ * overlap, provided that the start of aff[] is not after the start of
+ * jac[]. Even if the arrays do _not_ overlap, the source array is
+ * modified.
+ */
+static void
+window_to_affine(p256_affine *aff, p256_jacobian *jac, int num)
+{
+	/*
+	 * Convert the window points to affine coordinates. We use the
+	 * following trick to mutualize the inversion computation: if
+	 * we have z1, z2, z3, and z4, and want to inverse all of them,
+	 * we compute u = 1/(z1*z2*z3*z4), and then we have:
+	 *   1/z1 = u*z2*z3*z4
+	 *   1/z2 = u*z1*z3*z4
+	 *   1/z3 = u*z1*z2*z4
+	 *   1/z4 = u*z1*z2*z3
+	 *
+	 * The partial products are computed recursively:
+	 *
+	 *  - on input (z_1,z_2), return (z_2,z_1) and z_1*z_2
+	 *  - on input (z_1,z_2,... z_n):
+	 *       recurse on (z_1,z_2,... z_(n/2)) -> r1 and m1
+	 *       recurse on (z_(n/2+1),z_(n/2+2)... z_n) -> r2 and m2
+	 *       multiply elements of r1 by m2 -> s1
+	 *       multiply elements of r2 by m1 -> s2
+	 *       return r1||r2 and m1*m2
+	 *
+	 * In the example below, we suppose that we have 14 elements.
+	 * Let z1, z2,... zE be the 14 values to invert (index noted in
+	 * hexadecimal, starting at 1).
+	 *
+	 *  - Depth 1:
+	 *      swap(z1, z2); z12 = z1*z2
+	 *      swap(z3, z4); z34 = z3*z4
+	 *      swap(z5, z6); z56 = z5*z6
+	 *      swap(z7, z8); z78 = z7*z8
+	 *      swap(z9, zA); z9A = z9*zA
+	 *      swap(zB, zC); zBC = zB*zC
+	 *      swap(zD, zE); zDE = zD*zE
+	 *
+	 *  - Depth 2:
+	 *      z1 <- z1*z34, z2 <- z2*z34, z3 <- z3*z12, z4 <- z4*z12
+	 *      z1234 = z12*z34
+	 *      z5 <- z5*z78, z6 <- z6*z78, z7 <- z7*z56, z8 <- z8*z56
+	 *      z5678 = z56*z78
+	 *      z9 <- z9*zBC, zA <- zA*zBC, zB <- zB*z9A, zC <- zC*z9A
+	 *      z9ABC = z9A*zBC
+	 *
+	 *  - Depth 3:
+	 *      z1 <- z1*z5678, z2 <- z2*z5678, z3 <- z3*z5678, z4 <- z4*z5678
+	 *      z5 <- z5*z1234, z6 <- z6*z1234, z7 <- z7*z1234, z8 <- z8*z1234
+	 *      z12345678 = z1234*z5678
+	 *      z9 <- z9*zDE, zA <- zA*zDE, zB <- zB*zDE, zC <- zC*zDE
+	 *      zD <- zD*z9ABC, zE*z9ABC
+	 *      z9ABCDE = z9ABC*zDE
+	 *
+	 *  - Depth 4:
+	 *      multiply z1..z8 by z9ABCDE
+	 *      multiply z9..zE by z12345678
+	 *      final z = z12345678*z9ABCDE
+	 */
+
+	uint64_t z[16][4];
+	int i, k, s;
+#define zt   (z[15])
+#define zu   (z[14])
+#define zv   (z[13])
+
+	/*
+	 * First recursion step (pairwise swapping and multiplication).
+	 * If there is an odd number of elements, then we "invent" an
+	 * extra one with coordinate Z = 1 (in Montgomery representation).
+	 */
+	for (i = 0; (i + 1) < num; i += 2) {
+		memcpy(zt, jac[i].z, sizeof zt);
+		memcpy(jac[i].z, jac[i + 1].z, sizeof zt);
+		memcpy(jac[i + 1].z, zt, sizeof zt);
+		f256_montymul(z[i >> 1], jac[i].z, jac[i + 1].z);
+	}
+	if ((num & 1) != 0) {
+		memcpy(z[num >> 1], jac[num - 1].z, sizeof zt);
+		memcpy(jac[num - 1].z, F256_R, sizeof F256_R);
+	}
+
+	/*
+	 * Perform further recursion steps. At the entry of each step,
+	 * the process has been done for groups of 's' points. The
+	 * integer k is the log2 of s.
+	 */
+	for (k = 1, s = 2; s < num; k ++, s <<= 1) {
+		int n;
+
+		for (i = 0; i < num; i ++) {
+			f256_montymul(jac[i].z, jac[i].z, z[(i >> k) ^ 1]);
+		}
+		n = (num + s - 1) >> k;
+		for (i = 0; i < (n >> 1); i ++) {
+			f256_montymul(z[i], z[i << 1], z[(i << 1) + 1]);
+		}
+		if ((n & 1) != 0) {
+			memmove(z[n >> 1], z[n], sizeof zt);
+		}
+	}
+
+	/*
+	 * Invert the final result, and convert all points.
+	 */
+	f256_invert(zt, z[0]);
+	for (i = 0; i < num; i ++) {
+		f256_montymul(zv, jac[i].z, zt);
+		f256_montysquare(zu, zv);
+		f256_montymul(zv, zv, zu);
+		f256_montymul(aff[i].x, jac[i].x, zu);
+		f256_montymul(aff[i].y, jac[i].y, zv);
+	}
+}
+
+/*
+ * Multiply the provided point by an integer.
+ * Assumptions:
+ *  - Source point is a valid curve point.
+ *  - Source point is not the point-at-infinity.
+ *  - Integer is not 0, and is lower than the curve order.
+ * If these conditions are not met, then the result is indeterminate
+ * (but the process is still constant-time).
+ */
+static void
+p256_mul(p256_jacobian *P, const unsigned char *k, size_t klen)
+{
+	union {
+		p256_affine aff[15];
+		p256_jacobian jac[15];
+	} window;
+	int i;
+
+	/*
+	 * Compute window, in Jacobian coordinates.
+	 */
+	window.jac[0] = *P;
+	for (i = 2; i < 16; i ++) {
+		window.jac[i - 1] = window.jac[(i >> 1) - 1];
+		if ((i & 1) == 0) {
+			p256_double(&window.jac[i - 1]);
+		} else {
+			p256_add(&window.jac[i - 1], &window.jac[i >> 1]);
+		}
+	}
+
+	/*
+	 * Convert the window points to affine coordinates. Point
+	 * window[0] is the source point, already in affine coordinates.
+	 */
+	window_to_affine(window.aff, window.jac, 15);
+
+	/*
+	 * Perform point multiplication.
+	 */
+	point_mul_inner(P, window.aff, k, klen);
+}
+
+/*
+ * Precomputed window for the conventional generator: P256_Gwin[n]
+ * contains (n+1)*G (affine coordinates, in Montgomery representation).
+ */
+static const p256_affine P256_Gwin[] = {
+	{
+		{ 0x79E730D418A9143C, 0x75BA95FC5FEDB601,
+		  0x79FB732B77622510, 0x18905F76A53755C6 },
+		{ 0xDDF25357CE95560A, 0x8B4AB8E4BA19E45C,
+		  0xD2E88688DD21F325, 0x8571FF1825885D85 }
+	},
+	{
+		{ 0x850046D410DDD64D, 0xAA6AE3C1A433827D,
+		  0x732205038D1490D9, 0xF6BB32E43DCF3A3B },
+		{ 0x2F3648D361BEE1A5, 0x152CD7CBEB236FF8,
+		  0x19A8FB0E92042DBE, 0x78C577510A5B8A3B }
+	},
+	{
+		{ 0xFFAC3F904EEBC127, 0xB027F84A087D81FB,
+		  0x66AD77DD87CBBC98, 0x26936A3FB6FF747E },
+		{ 0xB04C5C1FC983A7EB, 0x583E47AD0861FE1A,
+		  0x788208311A2EE98E, 0xD5F06A29E587CC07 }
+	},
+	{
+		{ 0x74B0B50D46918DCC, 0x4650A6EDC623C173,
+		  0x0CDAACACE8100AF2, 0x577362F541B0176B },
+		{ 0x2D96F24CE4CBABA6, 0x17628471FAD6F447,
+		  0x6B6C36DEE5DDD22E, 0x84B14C394C5AB863 }
+	},
+	{
+		{ 0xBE1B8AAEC45C61F5, 0x90EC649A94B9537D,
+		  0x941CB5AAD076C20C, 0xC9079605890523C8 },
+		{ 0xEB309B4AE7BA4F10, 0x73C568EFE5EB882B,
+		  0x3540A9877E7A1F68, 0x73A076BB2DD1E916 }
+	},
+	{
+		{ 0x403947373E77664A, 0x55AE744F346CEE3E,
+		  0xD50A961A5B17A3AD, 0x13074B5954213673 },
+		{ 0x93D36220D377E44B, 0x299C2B53ADFF14B5,
+		  0xF424D44CEF639F11, 0xA4C9916D4A07F75F }
+	},
+	{
+		{ 0x0746354EA0173B4F, 0x2BD20213D23C00F7,
+		  0xF43EAAB50C23BB08, 0x13BA5119C3123E03 },
+		{ 0x2847D0303F5B9D4D, 0x6742F2F25DA67BDD,
+		  0xEF933BDC77C94195, 0xEAEDD9156E240867 }
+	},
+	{
+		{ 0x27F14CD19499A78F, 0x462AB5C56F9B3455,
+		  0x8F90F02AF02CFC6B, 0xB763891EB265230D },
+		{ 0xF59DA3A9532D4977, 0x21E3327DCF9EBA15,
+		  0x123C7B84BE60BBF0, 0x56EC12F27706DF76 }
+	},
+	{
+		{ 0x75C96E8F264E20E8, 0xABE6BFED59A7A841,
+		  0x2CC09C0444C8EB00, 0xE05B3080F0C4E16B },
+		{ 0x1EB7777AA45F3314, 0x56AF7BEDCE5D45E3,
+		  0x2B6E019A88B12F1A, 0x086659CDFD835F9B }
+	},
+	{
+		{ 0x2C18DBD19DC21EC8, 0x98F9868A0FCF8139,
+		  0x737D2CD648250B49, 0xCC61C94724B3428F },
+		{ 0x0C2B407880DD9E76, 0xC43A8991383FBE08,
+		  0x5F7D2D65779BE5D2, 0x78719A54EB3B4AB5 }
+	},
+	{
+		{ 0xEA7D260A6245E404, 0x9DE407956E7FDFE0,
+		  0x1FF3A4158DAC1AB5, 0x3E7090F1649C9073 },
+		{ 0x1A7685612B944E88, 0x250F939EE57F61C8,
+		  0x0C0DAA891EAD643D, 0x68930023E125B88E }
+	},
+	{
+		{ 0x04B71AA7D2697768, 0xABDEDEF5CA345A33,
+		  0x2409D29DEE37385E, 0x4EE1DF77CB83E156 },
+		{ 0x0CAC12D91CBB5B43, 0x170ED2F6CA895637,
+		  0x28228CFA8ADE6D66, 0x7FF57C9553238ACA }
+	},
+	{
+		{ 0xCCC425634B2ED709, 0x0E356769856FD30D,
+		  0xBCBCD43F559E9811, 0x738477AC5395B759 },
+		{ 0x35752B90C00EE17F, 0x68748390742ED2E3,
+		  0x7CD06422BD1F5BC1, 0xFBC08769C9E7B797 }
+	},
+	{
+		{ 0xA242A35BB0CF664A, 0x126E48F77F9707E3,
+		  0x1717BF54C6832660, 0xFAAE7332FD12C72E },
+		{ 0x27B52DB7995D586B, 0xBE29569E832237C2,
+		  0xE8E4193E2A65E7DB, 0x152706DC2EAA1BBB }
+	},
+	{
+		{ 0x72BCD8B7BC60055B, 0x03CC23EE56E27E4B,
+		  0xEE337424E4819370, 0xE2AA0E430AD3DA09 },
+		{ 0x40B8524F6383C45D, 0xD766355442A41B25,
+		  0x64EFA6DE778A4797, 0x2042170A7079ADF4 }
+	}
+};
+
+/*
+ * Multiply the conventional generator of the curve by the provided
+ * integer. Return is written in *P.
+ *
+ * Assumptions:
+ *  - Integer is not 0, and is lower than the curve order.
+ * If this conditions is not met, then the result is indeterminate
+ * (but the process is still constant-time).
+ */
+static void
+p256_mulgen(p256_jacobian *P, const unsigned char *k, size_t klen)
+{
+	point_mul_inner(P, P256_Gwin, k, klen);
+}
+
+/*
+ * Return 1 if all of the following hold:
+ *  - klen <= 32
+ *  - k != 0
+ *  - k is lower than the curve order
+ * Otherwise, return 0.
+ *
+ * Constant-time behaviour: only klen may be observable.
+ */
+static uint32_t
+check_scalar(const unsigned char *k, size_t klen)
+{
+	uint32_t z;
+	int32_t c;
+	size_t u;
+
+	if (klen > 32) {
+		return 0;
+	}
+	z = 0;
+	for (u = 0; u < klen; u ++) {
+		z |= k[u];
+	}
+	if (klen == 32) {
+		c = 0;
+		for (u = 0; u < klen; u ++) {
+			c |= -(int32_t)EQ0(c) & CMP(k[u], P256_N[u]);
+		}
+	} else {
+		c = -1;
+	}
+	return NEQ(z, 0) & LT0(c);
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *k, size_t klen, int curve)
+{
+	uint32_t r;
+	p256_jacobian P;
+
+	(void)curve;
+	if (Glen != 65) {
+		return 0;
+	}
+	r = check_scalar(k, klen);
+	r &= point_decode(&P, G);
+	p256_mul(&P, k, klen);
+	r &= point_encode(G, &P);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *k, size_t klen, int curve)
+{
+	p256_jacobian P;
+
+	(void)curve;
+	p256_mulgen(&P, k, klen);
+	point_encode(R, &P);
+	return 65;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	/*
+	 * We might want to use Shamir's trick here: make a composite
+	 * window of u*P+v*Q points, to merge the two doubling-ladders
+	 * into one. This, however, has some complications:
+	 *
+	 *  - During the computation, we may hit the point-at-infinity.
+	 *    Thus, we would need p256_add_complete_mixed() (complete
+	 *    formulas for point addition), with a higher cost (17 muls
+	 *    instead of 11).
+	 *
+	 *  - A 4-bit window would be too large, since it would involve
+	 *    16*16-1 = 255 points. For the same window size as in the
+	 *    p256_mul() case, we would need to reduce the window size
+	 *    to 2 bits, and thus perform twice as many non-doubling
+	 *    point additions.
+	 *
+	 *  - The window may itself contain the point-at-infinity, and
+	 *    thus cannot be in all generality be made of affine points.
+	 *    Instead, we would need to make it a window of points in
+	 *    Jacobian coordinates. Even p256_add_complete_mixed() would
+	 *    be inappropriate.
+	 *
+	 * For these reasons, the code below performs two separate
+	 * point multiplications, then computes the final point addition
+	 * (which is both a "normal" addition, and a doubling, to handle
+	 * all cases).
+	 */
+
+	p256_jacobian P, Q;
+	uint32_t r, t, s;
+	uint64_t z;
+
+	(void)curve;
+	if (len != 65) {
+		return 0;
+	}
+	r = point_decode(&P, A);
+	p256_mul(&P, x, xlen);
+	if (B == NULL) {
+		p256_mulgen(&Q, y, ylen);
+	} else {
+		r &= point_decode(&Q, B);
+		p256_mul(&Q, y, ylen);
+	}
+
+	/*
+	 * The final addition may fail in case both points are equal.
+	 */
+	t = p256_add(&P, &Q);
+	f256_final_reduce(P.z);
+	z = P.z[0] | P.z[1] | P.z[2] | P.z[3];
+	s = EQ((uint32_t)(z | (z >> 32)), 0);
+	p256_double(&Q);
+
+	/*
+	 * If s is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   s = 0, t = 0   return P (normal addition)
+	 *   s = 0, t = 1   return P (normal addition)
+	 *   s = 1, t = 0   return Q (a 'double' case)
+	 *   s = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(s & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P);
+	r &= ~(s & t);
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_p256_m64 = {
+	(uint32_t)0x00800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_p256_m64_get(void)
+{
+	return &br_ec_p256_m64;
+}
+
+#else
+
+/* see bearssl_ec.h */
+const br_ec_impl *
+br_ec_p256_m64_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ec_prime_i15.c b/third_party/bearssl/src/ec_prime_i15.c
new file mode 100644
index 0000000..f86dbe6
--- /dev/null
+++ b/third_party/bearssl/src/ec_prime_i15.c
@@ -0,0 +1,824 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for supported curves:
+ *   - field modulus p
+ *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
+ *   - b*R mod p (b is the second curve equation parameter)
+ */
+
+static const uint16_t P256_P[] = {
+	0x0111,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
+	0x7FFF, 0x0001
+};
+
+static const uint16_t P256_R2[] = {
+	0x0111,
+	0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
+	0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
+	0x4FFF, 0x0000
+};
+
+static const uint16_t P256_B[] = {
+	0x0111,
+	0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
+	0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
+	0x0187, 0x0000
+};
+
+static const uint16_t P384_P[] = {
+	0x0199,
+	0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
+	0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x01FF
+};
+
+static const uint16_t P384_R2[] = {
+	0x0199,
+	0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
+	0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
+	0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000
+};
+
+static const uint16_t P384_B[] = {
+	0x0199,
+	0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
+	0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
+	0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
+	0x0452, 0x0084
+};
+
+static const uint16_t P521_P[] = {
+	0x022B,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x07FF
+};
+
+static const uint16_t P521_R2[] = {
+	0x022B,
+	0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000
+};
+
+static const uint16_t P521_B[] = {
+	0x022B,
+	0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
+	0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
+	0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
+	0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
+	0x1618, 0x27D7, 0x0465
+};
+
+typedef struct {
+	const uint16_t *p;
+	const uint16_t *b;
+	const uint16_t *R2;
+	uint16_t p0i;
+	size_t point_len;
+} curve_params;
+
+static inline const curve_params *
+id_to_curve(int curve)
+{
+	static const curve_params pp[] = {
+		{ P256_P, P256_B, P256_R2, 0x0001,  65 },
+		{ P384_P, P384_B, P384_R2, 0x0001,  97 },
+		{ P521_P, P521_B, P521_R2, 0x0001, 133 }
+	};
+
+	return &pp[curve - BR_EC_secp256r1];
+}
+
+#define I15_LEN   ((BR_MAX_EC_SIZE + 29) / 15)
+
+/*
+ * Type for a point in Jacobian coordinates:
+ * -- three values, x, y and z, in Montgomery representation
+ * -- affine coordinates are X = x / z^2 and Y = y / z^3
+ * -- for the point at infinity, z = 0
+ */
+typedef struct {
+	uint16_t c[3][I15_LEN];
+} jacobian;
+
+/*
+ * We use a custom interpreter that uses a dozen registers, and
+ * only six operations:
+ *    MSET(d, a)       copy a into d
+ *    MADD(d, a)       d = d+a (modular)
+ *    MSUB(d, a)       d = d-a (modular)
+ *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
+ *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
+ *    MTZ(d)           clear return value if d = 0
+ * Destination of MMUL (d) must be distinct from operands (a and b).
+ * There is no such constraint for MSUB and MADD.
+ *
+ * Registers include the operand coordinates, and temporaries.
+ */
+#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
+#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
+#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
+#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
+#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
+#define MTZ(d)          (0x5000 + ((d) << 8))
+#define ENDCODE         0
+
+/*
+ * Registers for the input operands.
+ */
+#define P1x    0
+#define P1y    1
+#define P1z    2
+#define P2x    3
+#define P2y    4
+#define P2z    5
+
+/*
+ * Alternate names for the first input operand.
+ */
+#define Px     0
+#define Py     1
+#define Pz     2
+
+/*
+ * Temporaries.
+ */
+#define t1     6
+#define t2     7
+#define t3     8
+#define t4     9
+#define t5    10
+#define t6    11
+#define t7    12
+
+/*
+ * Extra scratch registers available when there is no second operand (e.g.
+ * for "double" and "affine").
+ */
+#define t8     3
+#define t9     4
+#define t10    5
+
+/*
+ * Doubling formulas are:
+ *
+ *   s = 4*x*y^2
+ *   m = 3*(x + z^2)*(x - z^2)
+ *   x' = m^2 - 2*s
+ *   y' = m*(s - x') - 8*y^4
+ *   z' = 2*y*z
+ *
+ * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
+ * should. This case should not happen anyway, because our curves have
+ * prime order, and thus do not contain any point of order 2.
+ *
+ * If P is infinity (z = 0), then again the formulas yield infinity,
+ * which is correct. Thus, this code works for all points.
+ *
+ * Cost: 8 multiplications
+ */
+static const uint16_t code_double[] = {
+	/*
+	 * Compute z^2 (in t1).
+	 */
+	MMUL(t1, Pz, Pz),
+
+	/*
+	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
+	 */
+	MSET(t2, Px),
+	MSUB(t2, t1),
+	MADD(t1, Px),
+
+	/*
+	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
+	 */
+	MMUL(t3, t1, t2),
+	MSET(t1, t3),
+	MADD(t1, t3),
+	MADD(t1, t3),
+
+	/*
+	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	MMUL(t3, Py, Py),
+	MADD(t3, t3),
+	MMUL(t2, Px, t3),
+	MADD(t2, t2),
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	MMUL(Px, t1, t1),
+	MSUB(Px, t2),
+	MSUB(Px, t2),
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	MMUL(t4, Py, Pz),
+	MSET(Pz, t4),
+	MADD(Pz, t4),
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	MSUB(t2, Px),
+	MMUL(Py, t1, t2),
+	MMUL(t4, t3, t3),
+	MSUB(Py, t4),
+	MSUB(Py, t4),
+
+	ENDCODE
+};
+
+/*
+ * Addtions formulas are:
+ *
+ *   u1 = x1 * z2^2
+ *   u2 = x2 * z1^2
+ *   s1 = y1 * z2^3
+ *   s2 = y2 * z1^3
+ *   h = u2 - u1
+ *   r = s2 - s1
+ *   x3 = r^2 - h^3 - 2 * u1 * h^2
+ *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+ *   z3 = h * z1 * z2
+ *
+ * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
+ * z3 == 0, so the result is correct.
+ * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
+ * not correct.
+ * h == 0 only if u1 == u2; this happens in two cases:
+ * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
+ * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
+ *
+ * Thus, the following situations are not handled correctly:
+ * -- P1 = 0 and P2 != 0
+ * -- P1 != 0 and P2 = 0
+ * -- P1 = P2
+ * All other cases are properly computed. However, even in "incorrect"
+ * situations, the three coordinates still are properly formed field
+ * elements.
+ *
+ * The returned flag is cleared if r == 0. This happens in the following
+ * cases:
+ * -- Both points are on the same horizontal line (same Y coordinate).
+ * -- Both points are infinity.
+ * -- One point is infinity and the other is on line Y = 0.
+ * The third case cannot happen with our curves (there is no valid point
+ * on line Y = 0 since that would be a point of order 2). If the two
+ * source points are non-infinity, then remains only the case where the
+ * two points are on the same horizontal line.
+ *
+ * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
+ * P2 != 0:
+ * -- If the returned value is not the point at infinity, then it was properly
+ * computed.
+ * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
+ * is indeed the point at infinity.
+ * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
+ * use the 'double' code.
+ *
+ * Cost: 16 multiplications
+ */
+static const uint16_t code_add[] = {
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	MMUL(t3, P2z, P2z),
+	MMUL(t1, P1x, t3),
+	MMUL(t4, P2z, t3),
+	MMUL(t3, P1y, t4),
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	MMUL(t4, P1z, P1z),
+	MMUL(t2, P2x, t4),
+	MMUL(t5, P1z, t4),
+	MMUL(t4, P2y, t5),
+
+	/*
+	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 */
+	MSUB(t2, t1),
+	MSUB(t4, t3),
+
+	/*
+	 * Report cases where r = 0 through the returned flag.
+	 */
+	MTZ(t4),
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5).
+	 */
+	MMUL(t7, t2, t2),
+	MMUL(t6, t1, t7),
+	MMUL(t5, t7, t2),
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 * t1 and t7 can be used as scratch registers.
+	 */
+	MMUL(P1x, t4, t4),
+	MSUB(P1x, t5),
+	MSUB(P1x, t6),
+	MSUB(P1x, t6),
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	MSUB(t6, P1x),
+	MMUL(P1y, t4, t6),
+	MMUL(t1, t5, t3),
+	MSUB(P1y, t1),
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	MMUL(t1, P1z, P2z),
+	MMUL(P1z, t1, t2),
+
+	ENDCODE
+};
+
+/*
+ * Check that the point is on the curve. This code snippet assumes the
+ * following conventions:
+ * -- Coordinates x and y have been freshly decoded in P1 (but not
+ * converted to Montgomery coordinates yet).
+ * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
+ */
+static const uint16_t code_check[] = {
+
+	/* Convert x and y to Montgomery representation. */
+	MMUL(t1, P1x, P2x),
+	MMUL(t2, P1y, P2x),
+	MSET(P1x, t1),
+	MSET(P1y, t2),
+
+	/* Compute x^3 in t1. */
+	MMUL(t2, P1x, P1x),
+	MMUL(t1, P1x, t2),
+
+	/* Subtract 3*x from t1. */
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+
+	/* Add b. */
+	MADD(t1, P2y),
+
+	/* Compute y^2 in t2. */
+	MMUL(t2, P1y, P1y),
+
+	/* Compare y^2 with x^3 - 3*x + b; they must match. */
+	MSUB(t1, t2),
+	MTZ(t1),
+
+	/* Set z to 1 (in Montgomery representation). */
+	MMUL(P1z, P2x, P2z),
+
+	ENDCODE
+};
+
+/*
+ * Conversion back to affine coordinates. This code snippet assumes that
+ * the z coordinate of P2 is set to 1 (not in Montgomery representation).
+ */
+static const uint16_t code_affine[] = {
+
+	/* Save z*R in t1. */
+	MSET(t1, P1z),
+
+	/* Compute z^3 in t2. */
+	MMUL(t2, P1z, P1z),
+	MMUL(t3, P1z, t2),
+	MMUL(t2, t3, P2z),
+
+	/* Invert to (1/z^3) in t2. */
+	MINV(t2, t3, t4),
+
+	/* Compute y. */
+	MSET(t3, P1y),
+	MMUL(P1y, t2, t3),
+
+	/* Compute (1/z^2) in t3. */
+	MMUL(t3, t2, t1),
+
+	/* Compute x. */
+	MSET(t2, P1x),
+	MMUL(P1x, t2, t3),
+
+	ENDCODE
+};
+
+static uint32_t
+run_code(jacobian *P1, const jacobian *P2,
+	const curve_params *cc, const uint16_t *code)
+{
+	uint32_t r;
+	uint16_t t[13][I15_LEN];
+	size_t u;
+
+	r = 1;
+
+	/*
+	 * Copy the two operands in the dedicated registers.
+	 */
+	memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
+	memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
+
+	/*
+	 * Run formulas.
+	 */
+	for (u = 0;; u ++) {
+		unsigned op, d, a, b;
+
+		op = code[u];
+		if (op == 0) {
+			break;
+		}
+		d = (op >> 8) & 0x0F;
+		a = (op >> 4) & 0x0F;
+		b = op & 0x0F;
+		op >>= 12;
+		switch (op) {
+			uint32_t ctl;
+			size_t plen;
+			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
+
+		case 0:
+			memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
+			break;
+		case 1:
+			ctl = br_i15_add(t[d], t[a], 1);
+			ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
+			br_i15_sub(t[d], cc->p, ctl);
+			break;
+		case 2:
+			br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
+			break;
+		case 3:
+			br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
+			break;
+		case 4:
+			plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+			br_i15_encode(tp, plen, cc->p);
+			tp[plen - 1] -= 2;
+			br_i15_modpow(t[d], tp, plen,
+				cc->p, cc->p0i, t[a], t[b]);
+			break;
+		default:
+			r &= ~br_i15_iszero(t[d]);
+			break;
+		}
+	}
+
+	/*
+	 * Copy back result.
+	 */
+	memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
+	return r;
+}
+
+static void
+set_one(uint16_t *x, const uint16_t *p)
+{
+	size_t plen;
+
+	plen = (p[0] + 31) >> 4;
+	memset(x, 0, plen * sizeof *x);
+	x[0] = p[0];
+	x[1] = 0x0001;
+}
+
+static void
+point_zero(jacobian *P, const curve_params *cc)
+{
+	memset(P, 0, sizeof *P);
+	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
+}
+
+static inline void
+point_double(jacobian *P, const curve_params *cc)
+{
+	run_code(P, P, cc, code_double);
+}
+
+static inline uint32_t
+point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
+{
+	return run_code(P1, P2, cc, code_add);
+}
+
+static void
+point_mul(jacobian *P, const unsigned char *x, size_t xlen,
+	const curve_params *cc)
+{
+	/*
+	 * We do a simple double-and-add ladder with a 2-bit window
+	 * to make only one add every two doublings. We thus first
+	 * precompute 2P and 3P in some local buffers.
+	 *
+	 * We always perform two doublings and one addition; the
+	 * addition is with P, 2P and 3P and is done in a temporary
+	 * array.
+	 *
+	 * The addition code cannot handle cases where one of the
+	 * operands is infinity, which is the case at the start of the
+	 * ladder. We therefore need to maintain a flag that controls
+	 * this situation.
+	 */
+	uint32_t qz;
+	jacobian P2, P3, Q, T, U;
+
+	memcpy(&P2, P, sizeof P2);
+	point_double(&P2, cc);
+	memcpy(&P3, P, sizeof P3);
+	point_add(&P3, &P2, cc);
+
+	point_zero(&Q, cc);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			point_double(&Q, cc);
+			point_double(&Q, cc);
+			memcpy(&T, P, sizeof T);
+			memcpy(&U, &Q, sizeof U);
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			point_add(&U, &T, cc);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	memcpy(P, &Q, sizeof Q);
+}
+
+/*
+ * Decode point into Jacobian coordinates. This function does not support
+ * the point at infinity. If the point is invalid then this returns 0, but
+ * the coordinates are still set to properly formed field elements.
+ */
+static uint32_t
+point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
+{
+	/*
+	 * Points must use uncompressed format:
+	 * -- first byte is 0x04;
+	 * -- coordinates X and Y use unsigned big-endian, with the same
+	 *    length as the field modulus.
+	 *
+	 * We don't support hybrid format (uncompressed, but first byte
+	 * has value 0x06 or 0x07, depending on the least significant bit
+	 * of Y) because it is rather useless, and explicitly forbidden
+	 * by PKIX (RFC 5480, section 2.2).
+	 *
+	 * We don't support compressed format either, because it is not
+	 * much used in practice (there are or were patent-related
+	 * concerns about point compression, which explains the lack of
+	 * generalised support). Also, point compression support would
+	 * need a bit more code.
+	 */
+	const unsigned char *buf;
+	size_t plen, zlen;
+	uint32_t r;
+	jacobian Q;
+
+	buf = src;
+	point_zero(P, cc);
+	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+	if (len != 1 + (plen << 1)) {
+		return 0;
+	}
+	r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p);
+	r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
+
+	/*
+	 * Check first byte.
+	 */
+	r &= EQ(buf[0], 0x04);
+	/* obsolete
+	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
+		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
+	*/
+
+	/*
+	 * Convert coordinates and check that the point is valid.
+	 */
+	zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t);
+	memcpy(Q.c[0], cc->R2, zlen);
+	memcpy(Q.c[1], cc->b, zlen);
+	set_one(Q.c[2], cc->p);
+	r &= ~run_code(P, &Q, cc, code_check);
+	return r;
+}
+
+/*
+ * Encode a point. This method assumes that the point is correct and is
+ * not the point at infinity. Encoded size is always 1+2*plen, where
+ * plen is the field modulus length, in bytes.
+ */
+static void
+point_encode(void *dst, const jacobian *P, const curve_params *cc)
+{
+	unsigned char *buf;
+	size_t plen;
+	jacobian Q, T;
+
+	buf = dst;
+	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+	buf[0] = 0x04;
+	memcpy(&Q, P, sizeof *P);
+	set_one(T.c[2], cc->p);
+	run_code(&Q, &T, cc, code_affine);
+	br_i15_encode(buf + 1, plen, Q.c[0]);
+	br_i15_encode(buf + 1 + plen, plen, Q.c[1]);
+}
+
+static const br_ec_curve_def *
+id_to_curve_def(int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return &br_secp256r1;
+	case BR_EC_secp384r1:
+		return &br_secp384r1;
+	case BR_EC_secp521r1:
+		return &br_secp521r1;
+	}
+	return NULL;
+}
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->generator_len;
+	return cd->generator;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->order_len;
+	return cd->order;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	api_generator(curve, len);
+	*len >>= 1;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	const curve_params *cc;
+	jacobian P;
+
+	cc = id_to_curve(curve);
+	if (Glen != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, G, Glen, cc);
+	point_mul(&P, x, xlen, cc);
+	point_encode(G, &P, cc);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	uint32_t r, t, z;
+	const curve_params *cc;
+	jacobian P, Q;
+
+	/*
+	 * TODO: see about merging the two ladders. Right now, we do
+	 * two independent point multiplications, which is a bit
+	 * wasteful of CPU resources (but yields short code).
+	 */
+
+	cc = id_to_curve(curve);
+	if (len != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, A, len, cc);
+	if (B == NULL) {
+		size_t Glen;
+
+		B = api_generator(curve, &Glen);
+	}
+	r &= point_decode(&Q, B, len, cc);
+	point_mul(&P, x, xlen, cc);
+	point_mul(&Q, y, ylen, cc);
+
+	/*
+	 * We want to compute P+Q. Since the base points A and B are distinct
+	 * from infinity, and the multipliers are non-zero and lower than the
+	 * curve order, then we know that P and Q are non-infinity. This
+	 * leaves two special situations to test for:
+	 * -- If P = Q then we must use point_double().
+	 * -- If P+Q = 0 then we must report an error.
+	 */
+	t = point_add(&P, &Q, cc);
+	point_double(&Q, cc);
+	z = br_i15_iszero(P.c[2]);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P, cc);
+	r &= ~(z & t);
+
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_prime_i15 = {
+	(uint32_t)0x03800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_prime_i31.c b/third_party/bearssl/src/ec_prime_i31.c
new file mode 100644
index 0000000..b205f36
--- /dev/null
+++ b/third_party/bearssl/src/ec_prime_i31.c
@@ -0,0 +1,826 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for supported curves (field modulus, and 'b' equation
+ * parameter; both values use the 'i31' format, and 'b' is in Montgomery
+ * representation).
+ */
+
+static const uint32_t P256_P[] = {
+	0x00000108,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x00000007,
+	0x00000000, 0x00000000, 0x00000040, 0x7FFFFF80,
+	0x000000FF
+};
+
+static const uint32_t P256_R2[] = {
+	0x00000108,
+	0x00014000, 0x00018000, 0x00000000, 0x7FF40000,
+	0x7FEFFFFF, 0x7FF7FFFF, 0x7FAFFFFF, 0x005FFFFF,
+	0x00000000
+};
+
+static const uint32_t P256_B[] = {
+	0x00000108,
+	0x6FEE1803, 0x6229C4BD, 0x21B139BE, 0x327150AA,
+	0x3567802E, 0x3F7212ED, 0x012E4355, 0x782DD38D,
+	0x0000000E
+};
+
+static const uint32_t P384_P[] = {
+	0x0000018C,
+	0x7FFFFFFF, 0x00000001, 0x00000000, 0x7FFFFFF8,
+	0x7FFFFFEF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x00000FFF
+};
+
+static const uint32_t P384_R2[] = {
+	0x0000018C,
+	0x00000000, 0x00000080, 0x7FFFFE00, 0x000001FF,
+	0x00000800, 0x00000000, 0x7FFFE000, 0x00001FFF,
+	0x00008000, 0x00008000, 0x00000000, 0x00000000,
+	0x00000000
+};
+
+static const uint32_t P384_B[] = {
+	0x0000018C,
+	0x6E666840, 0x070D0392, 0x5D810231, 0x7651D50C,
+	0x17E218D6, 0x1B192002, 0x44EFE441, 0x3A524E2B,
+	0x2719BA5F, 0x41F02209, 0x36C5643E, 0x5813EFFE,
+	0x000008A5
+};
+
+static const uint32_t P521_P[] = {
+	0x00000219,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
+	0x01FFFFFF
+};
+
+static const uint32_t P521_R2[] = {
+	0x00000219,
+	0x00001000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000
+};
+
+static const uint32_t P521_B[] = {
+	0x00000219,
+	0x540FC00A, 0x228FEA35, 0x2C34F1EF, 0x67BF107A,
+	0x46FC1CD5, 0x1605E9DD, 0x6937B165, 0x272A3D8F,
+	0x42785586, 0x44C8C778, 0x15F3B8B4, 0x64B73366,
+	0x03BA8B69, 0x0D05B42A, 0x21F929A2, 0x2C31C393,
+	0x00654FAE
+};
+
+typedef struct {
+	const uint32_t *p;
+	const uint32_t *b;
+	const uint32_t *R2;
+	uint32_t p0i;
+	size_t point_len;
+} curve_params;
+
+static inline const curve_params *
+id_to_curve(int curve)
+{
+	static const curve_params pp[] = {
+		{ P256_P, P256_B, P256_R2, 0x00000001,  65 },
+		{ P384_P, P384_B, P384_R2, 0x00000001,  97 },
+		{ P521_P, P521_B, P521_R2, 0x00000001, 133 }
+	};
+
+	return &pp[curve - BR_EC_secp256r1];
+}
+
+#define I31_LEN   ((BR_MAX_EC_SIZE + 61) / 31)
+
+/*
+ * Type for a point in Jacobian coordinates:
+ * -- three values, x, y and z, in Montgomery representation
+ * -- affine coordinates are X = x / z^2 and Y = y / z^3
+ * -- for the point at infinity, z = 0
+ */
+typedef struct {
+	uint32_t c[3][I31_LEN];
+} jacobian;
+
+/*
+ * We use a custom interpreter that uses a dozen registers, and
+ * only six operations:
+ *    MSET(d, a)       copy a into d
+ *    MADD(d, a)       d = d+a (modular)
+ *    MSUB(d, a)       d = d-a (modular)
+ *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
+ *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
+ *    MTZ(d)           clear return value if d = 0
+ * Destination of MMUL (d) must be distinct from operands (a and b).
+ * There is no such constraint for MSUB and MADD.
+ *
+ * Registers include the operand coordinates, and temporaries.
+ */
+#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
+#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
+#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
+#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
+#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
+#define MTZ(d)          (0x5000 + ((d) << 8))
+#define ENDCODE         0
+
+/*
+ * Registers for the input operands.
+ */
+#define P1x    0
+#define P1y    1
+#define P1z    2
+#define P2x    3
+#define P2y    4
+#define P2z    5
+
+/*
+ * Alternate names for the first input operand.
+ */
+#define Px     0
+#define Py     1
+#define Pz     2
+
+/*
+ * Temporaries.
+ */
+#define t1     6
+#define t2     7
+#define t3     8
+#define t4     9
+#define t5    10
+#define t6    11
+#define t7    12
+
+/*
+ * Extra scratch registers available when there is no second operand (e.g.
+ * for "double" and "affine").
+ */
+#define t8     3
+#define t9     4
+#define t10    5
+
+/*
+ * Doubling formulas are:
+ *
+ *   s = 4*x*y^2
+ *   m = 3*(x + z^2)*(x - z^2)
+ *   x' = m^2 - 2*s
+ *   y' = m*(s - x') - 8*y^4
+ *   z' = 2*y*z
+ *
+ * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
+ * should. This case should not happen anyway, because our curves have
+ * prime order, and thus do not contain any point of order 2.
+ *
+ * If P is infinity (z = 0), then again the formulas yield infinity,
+ * which is correct. Thus, this code works for all points.
+ *
+ * Cost: 8 multiplications
+ */
+static const uint16_t code_double[] = {
+	/*
+	 * Compute z^2 (in t1).
+	 */
+	MMUL(t1, Pz, Pz),
+
+	/*
+	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
+	 */
+	MSET(t2, Px),
+	MSUB(t2, t1),
+	MADD(t1, Px),
+
+	/*
+	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
+	 */
+	MMUL(t3, t1, t2),
+	MSET(t1, t3),
+	MADD(t1, t3),
+	MADD(t1, t3),
+
+	/*
+	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	MMUL(t3, Py, Py),
+	MADD(t3, t3),
+	MMUL(t2, Px, t3),
+	MADD(t2, t2),
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	MMUL(Px, t1, t1),
+	MSUB(Px, t2),
+	MSUB(Px, t2),
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	MMUL(t4, Py, Pz),
+	MSET(Pz, t4),
+	MADD(Pz, t4),
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	MSUB(t2, Px),
+	MMUL(Py, t1, t2),
+	MMUL(t4, t3, t3),
+	MSUB(Py, t4),
+	MSUB(Py, t4),
+
+	ENDCODE
+};
+
+/*
+ * Addtions formulas are:
+ *
+ *   u1 = x1 * z2^2
+ *   u2 = x2 * z1^2
+ *   s1 = y1 * z2^3
+ *   s2 = y2 * z1^3
+ *   h = u2 - u1
+ *   r = s2 - s1
+ *   x3 = r^2 - h^3 - 2 * u1 * h^2
+ *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+ *   z3 = h * z1 * z2
+ *
+ * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
+ * z3 == 0, so the result is correct.
+ * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
+ * not correct.
+ * h == 0 only if u1 == u2; this happens in two cases:
+ * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
+ * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
+ *
+ * Thus, the following situations are not handled correctly:
+ * -- P1 = 0 and P2 != 0
+ * -- P1 != 0 and P2 = 0
+ * -- P1 = P2
+ * All other cases are properly computed. However, even in "incorrect"
+ * situations, the three coordinates still are properly formed field
+ * elements.
+ *
+ * The returned flag is cleared if r == 0. This happens in the following
+ * cases:
+ * -- Both points are on the same horizontal line (same Y coordinate).
+ * -- Both points are infinity.
+ * -- One point is infinity and the other is on line Y = 0.
+ * The third case cannot happen with our curves (there is no valid point
+ * on line Y = 0 since that would be a point of order 2). If the two
+ * source points are non-infinity, then remains only the case where the
+ * two points are on the same horizontal line.
+ *
+ * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
+ * P2 != 0:
+ * -- If the returned value is not the point at infinity, then it was properly
+ * computed.
+ * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
+ * is indeed the point at infinity.
+ * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
+ * use the 'double' code.
+ *
+ * Cost: 16 multiplications
+ */
+static const uint16_t code_add[] = {
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	MMUL(t3, P2z, P2z),
+	MMUL(t1, P1x, t3),
+	MMUL(t4, P2z, t3),
+	MMUL(t3, P1y, t4),
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	MMUL(t4, P1z, P1z),
+	MMUL(t2, P2x, t4),
+	MMUL(t5, P1z, t4),
+	MMUL(t4, P2y, t5),
+
+	/*
+	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 */
+	MSUB(t2, t1),
+	MSUB(t4, t3),
+
+	/*
+	 * Report cases where r = 0 through the returned flag.
+	 */
+	MTZ(t4),
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5).
+	 */
+	MMUL(t7, t2, t2),
+	MMUL(t6, t1, t7),
+	MMUL(t5, t7, t2),
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 * t1 and t7 can be used as scratch registers.
+	 */
+	MMUL(P1x, t4, t4),
+	MSUB(P1x, t5),
+	MSUB(P1x, t6),
+	MSUB(P1x, t6),
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	MSUB(t6, P1x),
+	MMUL(P1y, t4, t6),
+	MMUL(t1, t5, t3),
+	MSUB(P1y, t1),
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	MMUL(t1, P1z, P2z),
+	MMUL(P1z, t1, t2),
+
+	ENDCODE
+};
+
+/*
+ * Check that the point is on the curve. This code snippet assumes the
+ * following conventions:
+ * -- Coordinates x and y have been freshly decoded in P1 (but not
+ * converted to Montgomery coordinates yet).
+ * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
+ */
+static const uint16_t code_check[] = {
+
+	/* Convert x and y to Montgomery representation. */
+	MMUL(t1, P1x, P2x),
+	MMUL(t2, P1y, P2x),
+	MSET(P1x, t1),
+	MSET(P1y, t2),
+
+	/* Compute x^3 in t1. */
+	MMUL(t2, P1x, P1x),
+	MMUL(t1, P1x, t2),
+
+	/* Subtract 3*x from t1. */
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+
+	/* Add b. */
+	MADD(t1, P2y),
+
+	/* Compute y^2 in t2. */
+	MMUL(t2, P1y, P1y),
+
+	/* Compare y^2 with x^3 - 3*x + b; they must match. */
+	MSUB(t1, t2),
+	MTZ(t1),
+
+	/* Set z to 1 (in Montgomery representation). */
+	MMUL(P1z, P2x, P2z),
+
+	ENDCODE
+};
+
+/*
+ * Conversion back to affine coordinates. This code snippet assumes that
+ * the z coordinate of P2 is set to 1 (not in Montgomery representation).
+ */
+static const uint16_t code_affine[] = {
+
+	/* Save z*R in t1. */
+	MSET(t1, P1z),
+
+	/* Compute z^3 in t2. */
+	MMUL(t2, P1z, P1z),
+	MMUL(t3, P1z, t2),
+	MMUL(t2, t3, P2z),
+
+	/* Invert to (1/z^3) in t2. */
+	MINV(t2, t3, t4),
+
+	/* Compute y. */
+	MSET(t3, P1y),
+	MMUL(P1y, t2, t3),
+
+	/* Compute (1/z^2) in t3. */
+	MMUL(t3, t2, t1),
+
+	/* Compute x. */
+	MSET(t2, P1x),
+	MMUL(P1x, t2, t3),
+
+	ENDCODE
+};
+
+static uint32_t
+run_code(jacobian *P1, const jacobian *P2,
+	const curve_params *cc, const uint16_t *code)
+{
+	uint32_t r;
+	uint32_t t[13][I31_LEN];
+	size_t u;
+
+	r = 1;
+
+	/*
+	 * Copy the two operands in the dedicated registers.
+	 */
+	memcpy(t[P1x], P1->c, 3 * I31_LEN * sizeof(uint32_t));
+	memcpy(t[P2x], P2->c, 3 * I31_LEN * sizeof(uint32_t));
+
+	/*
+	 * Run formulas.
+	 */
+	for (u = 0;; u ++) {
+		unsigned op, d, a, b;
+
+		op = code[u];
+		if (op == 0) {
+			break;
+		}
+		d = (op >> 8) & 0x0F;
+		a = (op >> 4) & 0x0F;
+		b = op & 0x0F;
+		op >>= 12;
+		switch (op) {
+			uint32_t ctl;
+			size_t plen;
+			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
+
+		case 0:
+			memcpy(t[d], t[a], I31_LEN * sizeof(uint32_t));
+			break;
+		case 1:
+			ctl = br_i31_add(t[d], t[a], 1);
+			ctl |= NOT(br_i31_sub(t[d], cc->p, 0));
+			br_i31_sub(t[d], cc->p, ctl);
+			break;
+		case 2:
+			br_i31_add(t[d], cc->p, br_i31_sub(t[d], t[a], 1));
+			break;
+		case 3:
+			br_i31_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
+			break;
+		case 4:
+			plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
+			br_i31_encode(tp, plen, cc->p);
+			tp[plen - 1] -= 2;
+			br_i31_modpow(t[d], tp, plen,
+				cc->p, cc->p0i, t[a], t[b]);
+			break;
+		default:
+			r &= ~br_i31_iszero(t[d]);
+			break;
+		}
+	}
+
+	/*
+	 * Copy back result.
+	 */
+	memcpy(P1->c, t[P1x], 3 * I31_LEN * sizeof(uint32_t));
+	return r;
+}
+
+static void
+set_one(uint32_t *x, const uint32_t *p)
+{
+	size_t plen;
+
+	plen = (p[0] + 63) >> 5;
+	memset(x, 0, plen * sizeof *x);
+	x[0] = p[0];
+	x[1] = 0x00000001;
+}
+
+static void
+point_zero(jacobian *P, const curve_params *cc)
+{
+	memset(P, 0, sizeof *P);
+	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
+}
+
+static inline void
+point_double(jacobian *P, const curve_params *cc)
+{
+	run_code(P, P, cc, code_double);
+}
+
+static inline uint32_t
+point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
+{
+	return run_code(P1, P2, cc, code_add);
+}
+
+static void
+point_mul(jacobian *P, const unsigned char *x, size_t xlen,
+	const curve_params *cc)
+{
+	/*
+	 * We do a simple double-and-add ladder with a 2-bit window
+	 * to make only one add every two doublings. We thus first
+	 * precompute 2P and 3P in some local buffers.
+	 *
+	 * We always perform two doublings and one addition; the
+	 * addition is with P, 2P and 3P and is done in a temporary
+	 * array.
+	 *
+	 * The addition code cannot handle cases where one of the
+	 * operands is infinity, which is the case at the start of the
+	 * ladder. We therefore need to maintain a flag that controls
+	 * this situation.
+	 */
+	uint32_t qz;
+	jacobian P2, P3, Q, T, U;
+
+	memcpy(&P2, P, sizeof P2);
+	point_double(&P2, cc);
+	memcpy(&P3, P, sizeof P3);
+	point_add(&P3, &P2, cc);
+
+	point_zero(&Q, cc);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			point_double(&Q, cc);
+			point_double(&Q, cc);
+			memcpy(&T, P, sizeof T);
+			memcpy(&U, &Q, sizeof U);
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			point_add(&U, &T, cc);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	memcpy(P, &Q, sizeof Q);
+}
+
+/*
+ * Decode point into Jacobian coordinates. This function does not support
+ * the point at infinity. If the point is invalid then this returns 0, but
+ * the coordinates are still set to properly formed field elements.
+ */
+static uint32_t
+point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
+{
+	/*
+	 * Points must use uncompressed format:
+	 * -- first byte is 0x04;
+	 * -- coordinates X and Y use unsigned big-endian, with the same
+	 *    length as the field modulus.
+	 *
+	 * We don't support hybrid format (uncompressed, but first byte
+	 * has value 0x06 or 0x07, depending on the least significant bit
+	 * of Y) because it is rather useless, and explicitly forbidden
+	 * by PKIX (RFC 5480, section 2.2).
+	 *
+	 * We don't support compressed format either, because it is not
+	 * much used in practice (there are or were patent-related
+	 * concerns about point compression, which explains the lack of
+	 * generalised support). Also, point compression support would
+	 * need a bit more code.
+	 */
+	const unsigned char *buf;
+	size_t plen, zlen;
+	uint32_t r;
+	jacobian Q;
+
+	buf = src;
+	point_zero(P, cc);
+	plen = (cc->p[0] - (cc->p[0] >> 5) + 7) >> 3;
+	if (len != 1 + (plen << 1)) {
+		return 0;
+	}
+	r = br_i31_decode_mod(P->c[0], buf + 1, plen, cc->p);
+	r &= br_i31_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
+
+	/*
+	 * Check first byte.
+	 */
+	r &= EQ(buf[0], 0x04);
+	/* obsolete
+	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
+		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
+	*/
+
+	/*
+	 * Convert coordinates and check that the point is valid.
+	 */
+	zlen = ((cc->p[0] + 63) >> 5) * sizeof(uint32_t);
+	memcpy(Q.c[0], cc->R2, zlen);
+	memcpy(Q.c[1], cc->b, zlen);
+	set_one(Q.c[2], cc->p);
+	r &= ~run_code(P, &Q, cc, code_check);
+	return r;
+}
+
+/*
+ * Encode a point. This method assumes that the point is correct and is
+ * not the point at infinity. Encoded size is always 1+2*plen, where
+ * plen is the field modulus length, in bytes.
+ */
+static void
+point_encode(void *dst, const jacobian *P, const curve_params *cc)
+{
+	unsigned char *buf;
+	uint32_t xbl;
+	size_t plen;
+	jacobian Q, T;
+
+	buf = dst;
+	xbl = cc->p[0];
+	xbl -= (xbl >> 5);
+	plen = (xbl + 7) >> 3;
+	buf[0] = 0x04;
+	memcpy(&Q, P, sizeof *P);
+	set_one(T.c[2], cc->p);
+	run_code(&Q, &T, cc, code_affine);
+	br_i31_encode(buf + 1, plen, Q.c[0]);
+	br_i31_encode(buf + 1 + plen, plen, Q.c[1]);
+}
+
+static const br_ec_curve_def *
+id_to_curve_def(int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return &br_secp256r1;
+	case BR_EC_secp384r1:
+		return &br_secp384r1;
+	case BR_EC_secp521r1:
+		return &br_secp521r1;
+	}
+	return NULL;
+}
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->generator_len;
+	return cd->generator;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->order_len;
+	return cd->order;
+}
+
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	api_generator(curve, len);
+	*len >>= 1;
+	return 1;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	const curve_params *cc;
+	jacobian P;
+
+	cc = id_to_curve(curve);
+	if (Glen != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, G, Glen, cc);
+	point_mul(&P, x, xlen, cc);
+	point_encode(G, &P, cc);
+	return r;
+}
+
+static size_t
+api_mulgen(unsigned char *R,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	const unsigned char *G;
+	size_t Glen;
+
+	G = api_generator(curve, &Glen);
+	memcpy(R, G, Glen);
+	api_mul(R, Glen, x, xlen, curve);
+	return Glen;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	uint32_t r, t, z;
+	const curve_params *cc;
+	jacobian P, Q;
+
+	/*
+	 * TODO: see about merging the two ladders. Right now, we do
+	 * two independent point multiplications, which is a bit
+	 * wasteful of CPU resources (but yields short code).
+	 */
+
+	cc = id_to_curve(curve);
+	if (len != cc->point_len) {
+		return 0;
+	}
+	r = point_decode(&P, A, len, cc);
+	if (B == NULL) {
+		size_t Glen;
+
+		B = api_generator(curve, &Glen);
+	}
+	r &= point_decode(&Q, B, len, cc);
+	point_mul(&P, x, xlen, cc);
+	point_mul(&Q, y, ylen, cc);
+
+	/*
+	 * We want to compute P+Q. Since the base points A and B are distinct
+	 * from infinity, and the multipliers are non-zero and lower than the
+	 * curve order, then we know that P and Q are non-infinity. This
+	 * leaves two special situations to test for:
+	 * -- If P = Q then we must use point_double().
+	 * -- If P+Q = 0 then we must report an error.
+	 */
+	t = point_add(&P, &Q, cc);
+	point_double(&Q, cc);
+	z = br_i31_iszero(P.c[2]);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P, cc);
+	r &= ~(z & t);
+
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_prime_i31 = {
+	(uint32_t)0x03800000,
+	&api_generator,
+	&api_order,
+	&api_xoff,
+	&api_mul,
+	&api_mulgen,
+	&api_muladd
+};
diff --git a/third_party/bearssl/src/ec_pubkey.c b/third_party/bearssl/src/ec_pubkey.c
new file mode 100644
index 0000000..383ff28
--- /dev/null
+++ b/third_party/bearssl/src/ec_pubkey.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char POINT_LEN[] = {
+	  0,   /* 0: not a valid curve ID */
+	 43,   /* sect163k1 */
+	 43,   /* sect163r1 */
+	 43,   /* sect163r2 */
+	 51,   /* sect193r1 */
+	 51,   /* sect193r2 */
+	 61,   /* sect233k1 */
+	 61,   /* sect233r1 */
+	 61,   /* sect239k1 */
+	 73,   /* sect283k1 */
+	 73,   /* sect283r1 */
+	105,   /* sect409k1 */
+	105,   /* sect409r1 */
+	145,   /* sect571k1 */
+	145,   /* sect571r1 */
+	 41,   /* secp160k1 */
+	 41,   /* secp160r1 */
+	 41,   /* secp160r2 */
+	 49,   /* secp192k1 */
+	 49,   /* secp192r1 */
+	 57,   /* secp224k1 */
+	 57,   /* secp224r1 */
+	 65,   /* secp256k1 */
+	 65,   /* secp256r1 */
+	 97,   /* secp384r1 */
+	133,   /* secp521r1 */
+	 65,   /* brainpoolP256r1 */
+	 97,   /* brainpoolP384r1 */
+	129,   /* brainpoolP512r1 */
+	 32,   /* curve25519 */
+	 56,   /* curve448 */
+};
+
+/* see bearssl_ec.h */
+size_t
+br_ec_compute_pub(const br_ec_impl *impl, br_ec_public_key *pk,
+	void *kbuf, const br_ec_private_key *sk)
+{
+	int curve;
+	size_t len;
+
+	curve = sk->curve;
+	if (curve < 0 || curve >= 32 || curve >= (int)(sizeof POINT_LEN)
+		|| ((impl->supported_curves >> curve) & 1) == 0)
+	{
+		return 0;
+	}
+	if (kbuf == NULL) {
+		return POINT_LEN[curve];
+	}
+	len = impl->mulgen(kbuf, sk->x, sk->xlen, curve);
+	if (pk != NULL) {
+		pk->curve = curve;
+		pk->q = kbuf;
+		pk->qlen = len;
+	}
+	return len;
+}
diff --git a/third_party/bearssl/src/ec_secp256r1.c b/third_party/bearssl/src/ec_secp256r1.c
new file mode 100644
index 0000000..a9d6c45
--- /dev/null
+++ b/third_party/bearssl/src/ec_secp256r1.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char P256_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84,
+	0xF3, 0xB9, 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51
+};
+
+static const unsigned char P256_G[] = {
+	0x04, 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42,
+	0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63, 0xA4, 0x40,
+	0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33,
+	0xA0, 0xF4, 0xA1, 0x39, 0x45, 0xD8, 0x98, 0xC2,
+	0x96, 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F,
+	0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, 0x0F, 0x9E,
+	0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E,
+	0xCE, 0xCB, 0xB6, 0x40, 0x68, 0x37, 0xBF, 0x51,
+	0xF5
+};
+
+/* see inner.h */
+const br_ec_curve_def br_secp256r1 = {
+	BR_EC_secp256r1,
+	P256_N, sizeof P256_N,
+	P256_G, sizeof P256_G
+};
diff --git a/third_party/bearssl/src/ec_secp384r1.c b/third_party/bearssl/src/ec_secp384r1.c
new file mode 100644
index 0000000..693d93e
--- /dev/null
+++ b/third_party/bearssl/src/ec_secp384r1.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char P384_N[] = {
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
+	0xC7, 0x63, 0x4D, 0x81, 0xF4, 0x37, 0x2D, 0xDF, 
+	0x58, 0x1A, 0x0D, 0xB2, 0x48, 0xB0, 0xA7, 0x7A, 
+	0xEC, 0xEC, 0x19, 0x6A, 0xCC, 0xC5, 0x29, 0x73
+};
+
+static const unsigned char P384_G[] = {
+	0x04, 0xAA, 0x87, 0xCA, 0x22, 0xBE, 0x8B, 0x05,
+	0x37, 0x8E, 0xB1, 0xC7, 0x1E, 0xF3, 0x20, 0xAD,
+	0x74, 0x6E, 0x1D, 0x3B, 0x62, 0x8B, 0xA7, 0x9B,
+	0x98, 0x59, 0xF7, 0x41, 0xE0, 0x82, 0x54, 0x2A,
+	0x38, 0x55, 0x02, 0xF2, 0x5D, 0xBF, 0x55, 0x29,
+	0x6C, 0x3A, 0x54, 0x5E, 0x38, 0x72, 0x76, 0x0A,
+	0xB7, 0x36, 0x17, 0xDE, 0x4A, 0x96, 0x26, 0x2C,
+	0x6F, 0x5D, 0x9E, 0x98, 0xBF, 0x92, 0x92, 0xDC,
+	0x29, 0xF8, 0xF4, 0x1D, 0xBD, 0x28, 0x9A, 0x14,
+	0x7C, 0xE9, 0xDA, 0x31, 0x13, 0xB5, 0xF0, 0xB8,
+	0xC0, 0x0A, 0x60, 0xB1, 0xCE, 0x1D, 0x7E, 0x81,
+	0x9D, 0x7A, 0x43, 0x1D, 0x7C, 0x90, 0xEA, 0x0E,
+	0x5F
+};
+
+/* see inner.h */
+const br_ec_curve_def br_secp384r1 = {
+	BR_EC_secp384r1,
+	P384_N, sizeof P384_N,
+	P384_G, sizeof P384_G
+};
diff --git a/third_party/bearssl/src/ec_secp521r1.c b/third_party/bearssl/src/ec_secp521r1.c
new file mode 100644
index 0000000..161acd0
--- /dev/null
+++ b/third_party/bearssl/src/ec_secp521r1.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const unsigned char P521_N[] = {
+	0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xFF, 0xFA, 0x51, 0x86, 0x87, 0x83, 0xBF, 0x2F,
+	0x96, 0x6B, 0x7F, 0xCC, 0x01, 0x48, 0xF7, 0x09,
+	0xA5, 0xD0, 0x3B, 0xB5, 0xC9, 0xB8, 0x89, 0x9C,
+	0x47, 0xAE, 0xBB, 0x6F, 0xB7, 0x1E, 0x91, 0x38,
+	0x64, 0x09
+};
+
+static const unsigned char P521_G[] = {
+	0x04, 0x00, 0xC6, 0x85, 0x8E, 0x06, 0xB7, 0x04,
+	0x04, 0xE9, 0xCD, 0x9E, 0x3E, 0xCB, 0x66, 0x23,
+	0x95, 0xB4, 0x42, 0x9C, 0x64, 0x81, 0x39, 0x05,
+	0x3F, 0xB5, 0x21, 0xF8, 0x28, 0xAF, 0x60, 0x6B,
+	0x4D, 0x3D, 0xBA, 0xA1, 0x4B, 0x5E, 0x77, 0xEF,
+	0xE7, 0x59, 0x28, 0xFE, 0x1D, 0xC1, 0x27, 0xA2,
+	0xFF, 0xA8, 0xDE, 0x33, 0x48, 0xB3, 0xC1, 0x85,
+	0x6A, 0x42, 0x9B, 0xF9, 0x7E, 0x7E, 0x31, 0xC2,
+	0xE5, 0xBD, 0x66, 0x01, 0x18, 0x39, 0x29, 0x6A,
+	0x78, 0x9A, 0x3B, 0xC0, 0x04, 0x5C, 0x8A, 0x5F,
+	0xB4, 0x2C, 0x7D, 0x1B, 0xD9, 0x98, 0xF5, 0x44,
+	0x49, 0x57, 0x9B, 0x44, 0x68, 0x17, 0xAF, 0xBD,
+	0x17, 0x27, 0x3E, 0x66, 0x2C, 0x97, 0xEE, 0x72,
+	0x99, 0x5E, 0xF4, 0x26, 0x40, 0xC5, 0x50, 0xB9,
+	0x01, 0x3F, 0xAD, 0x07, 0x61, 0x35, 0x3C, 0x70,
+	0x86, 0xA2, 0x72, 0xC2, 0x40, 0x88, 0xBE, 0x94,
+	0x76, 0x9F, 0xD1, 0x66, 0x50
+};
+
+/* see inner.h */
+const br_ec_curve_def br_secp521r1 = {
+	BR_EC_secp521r1,
+	P521_N, sizeof P521_N,
+	P521_G, sizeof P521_G
+};
diff --git a/third_party/bearssl/src/ecdsa_atr.c b/third_party/bearssl/src/ecdsa_atr.c
new file mode 100644
index 0000000..3a11226
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_atr.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_asn1_to_raw(void *sig, size_t sig_len)
+{
+	/*
+	 * Note: this code is a bit lenient in that it accepts a few
+	 * deviations to DER with regards to minimality of encoding of
+	 * lengths and integer values. These deviations are still
+	 * unambiguous.
+	 *
+	 * Signature format is a SEQUENCE of two INTEGER values. We
+	 * support only integers of less than 127 bytes each (signed
+	 * encoding) so the resulting raw signature will have length
+	 * at most 254 bytes.
+	 */
+
+	unsigned char *buf, *r, *s;
+	size_t zlen, rlen, slen, off;
+	unsigned char tmp[254];
+
+	buf = sig;
+	if (sig_len < 8) {
+		return 0;
+	}
+
+	/*
+	 * First byte is SEQUENCE tag.
+	 */
+	if (buf[0] != 0x30) {
+		return 0;
+	}
+
+	/*
+	 * The SEQUENCE length will be encoded over one or two bytes. We
+	 * limit the total SEQUENCE contents to 255 bytes, because it
+	 * makes things simpler; this is enough for subgroup orders up
+	 * to 999 bits.
+	 */
+	zlen = buf[1];
+	if (zlen > 0x80) {
+		if (zlen != 0x81) {
+			return 0;
+		}
+		zlen = buf[2];
+		if (zlen != sig_len - 3) {
+			return 0;
+		}
+		off = 3;
+	} else {
+		if (zlen != sig_len - 2) {
+			return 0;
+		}
+		off = 2;
+	}
+
+	/*
+	 * First INTEGER (r).
+	 */
+	if (buf[off ++] != 0x02) {
+		return 0;
+	}
+	rlen = buf[off ++];
+	if (rlen >= 0x80) {
+		return 0;
+	}
+	r = buf + off;
+	off += rlen;
+
+	/*
+	 * Second INTEGER (s).
+	 */
+	if (off + 2 > sig_len) {
+		return 0;
+	}
+	if (buf[off ++] != 0x02) {
+		return 0;
+	}
+	slen = buf[off ++];
+	if (slen >= 0x80 || slen != sig_len - off) {
+		return 0;
+	}
+	s = buf + off;
+
+	/*
+	 * Removing leading zeros from r and s.
+	 */
+	while (rlen > 0 && *r == 0) {
+		rlen --;
+		r ++;
+	}
+	while (slen > 0 && *s == 0) {
+		slen --;
+		s ++;
+	}
+
+	/*
+	 * Compute common length for the two integers, then copy integers
+	 * into the temporary buffer, and finally copy it back over the
+	 * signature buffer.
+	 */
+	zlen = rlen > slen ? rlen : slen;
+	sig_len = zlen << 1;
+	memset(tmp, 0, sig_len);
+	memcpy(tmp + zlen - rlen, r, rlen);
+	memcpy(tmp + sig_len - slen, s, slen);
+	memcpy(sig, tmp, sig_len);
+	return sig_len;
+}
diff --git a/third_party/bearssl/src/ecdsa_default_sign_asn1.c b/third_party/bearssl/src/ecdsa_default_sign_asn1.c
new file mode 100644
index 0000000..afbf8ac
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_default_sign_asn1.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+br_ecdsa_sign
+br_ecdsa_sign_asn1_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ecdsa_i15_sign_asn1;
+#else
+	return &br_ecdsa_i31_sign_asn1;
+#endif
+}
diff --git a/third_party/bearssl/src/ecdsa_default_sign_raw.c b/third_party/bearssl/src/ecdsa_default_sign_raw.c
new file mode 100644
index 0000000..287c970
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_default_sign_raw.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+br_ecdsa_sign
+br_ecdsa_sign_raw_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ecdsa_i15_sign_raw;
+#else
+	return &br_ecdsa_i31_sign_raw;
+#endif
+}
diff --git a/third_party/bearssl/src/ecdsa_default_vrfy_asn1.c b/third_party/bearssl/src/ecdsa_default_vrfy_asn1.c
new file mode 100644
index 0000000..fe0996e
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_default_vrfy_asn1.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+br_ecdsa_vrfy
+br_ecdsa_vrfy_asn1_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ecdsa_i15_vrfy_asn1;
+#else
+	return &br_ecdsa_i31_vrfy_asn1;
+#endif
+}
diff --git a/third_party/bearssl/src/ecdsa_default_vrfy_raw.c b/third_party/bearssl/src/ecdsa_default_vrfy_raw.c
new file mode 100644
index 0000000..e564a10
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_default_vrfy_raw.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ec.h */
+br_ecdsa_vrfy
+br_ecdsa_vrfy_raw_get_default(void)
+{
+#if BR_LOMUL
+	return &br_ecdsa_i15_vrfy_raw;
+#else
+	return &br_ecdsa_i31_vrfy_raw;
+#endif
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_bits.c b/third_party/bearssl/src/ecdsa_i15_bits.c
new file mode 100644
index 0000000..402d14a
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_bits.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_ecdsa_i15_bits2int(uint16_t *x,
+	const void *src, size_t len, uint32_t ebitlen)
+{
+	uint32_t bitlen, hbitlen;
+	int sc;
+
+	bitlen = ebitlen - (ebitlen >> 4);
+	hbitlen = (uint32_t)len << 3;
+	if (hbitlen > bitlen) {
+		len = (bitlen + 7) >> 3;
+		sc = (int)((hbitlen - bitlen) & 7);
+	} else {
+		sc = 0;
+	}
+	br_i15_zero(x, ebitlen);
+	br_i15_decode(x, src, len);
+	br_i15_rshift(x, sc);
+	x[0] = ebitlen;
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_sign_asn1.c b/third_party/bearssl/src/ecdsa_i15_sign_asn1.c
new file mode 100644
index 0000000..ab4a283
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_sign_asn1.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i15_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	unsigned char rsig[(ORDER_LEN << 1) + 12];
+	size_t sig_len;
+
+	sig_len = br_ecdsa_i15_sign_raw(impl, hf, hash_value, sk, rsig);
+	if (sig_len == 0) {
+		return 0;
+	}
+	sig_len = br_ecdsa_raw_to_asn1(rsig, sig_len);
+	memcpy(sig, rsig, sig_len);
+	return sig_len;
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_sign_raw.c b/third_party/bearssl/src/ecdsa_i15_sign_raw.c
new file mode 100644
index 0000000..39b2e1d
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_sign_raw.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I15_LEN     ((BR_MAX_EC_SIZE + 29) / 15)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i15_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 * We also rely on the last byte of the curve order to be distinct
+	 * from 0 and 1.
+	 */
+	const br_ec_curve_def *cd;
+	uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], x[I15_LEN];
+	uint16_t m[I15_LEN], k[I15_LEN], t1[I15_LEN], t2[I15_LEN];
+	unsigned char tt[ORDER_LEN << 1];
+	unsigned char eU[POINT_LEN];
+	size_t hash_len, nlen, ulen;
+	uint16_t n0i;
+	uint32_t ctl;
+	br_hmac_drbg_context drbg;
+
+	/*
+	 * If the curve is not supported, then exit with an error.
+	 */
+	if (((impl->supported_curves >> sk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (sk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Get modulus.
+	 */
+	nlen = cd->order_len;
+	br_i15_decode(n, cd->order, nlen);
+	n0i = br_i15_ninv15(n[1]);
+
+	/*
+	 * Get private key as an i15 integer. This also checks that the
+	 * private key is well-defined (not zero, and less than the
+	 * curve order).
+	 */
+	if (!br_i15_decode_mod(x, sk->x, sk->xlen, n)) {
+		return 0;
+	}
+	if (br_i15_iszero(x)) {
+		return 0;
+	}
+
+	/*
+	 * Get hash length.
+	 */
+	hash_len = (hf->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+
+	/*
+	 * Truncate and reduce the hash value modulo the curve order.
+	 */
+	br_ecdsa_i15_bits2int(m, hash_value, hash_len, n[0]);
+	br_i15_sub(m, n, br_i15_sub(m, n, 0) ^ 1);
+
+	/*
+	 * RFC 6979 generation of the "k" value.
+	 *
+	 * The process uses HMAC_DRBG (with the hash function used to
+	 * process the message that is to be signed). The seed is the
+	 * concatenation of the encodings of the private key and
+	 * the hash value (after truncation and modular reduction).
+	 */
+	br_i15_encode(tt, nlen, x);
+	br_i15_encode(tt + nlen, nlen, m);
+	br_hmac_drbg_init(&drbg, hf, tt, nlen << 1);
+	for (;;) {
+		br_hmac_drbg_generate(&drbg, tt, nlen);
+		br_ecdsa_i15_bits2int(k, tt, nlen, n[0]);
+		if (br_i15_iszero(k)) {
+			continue;
+		}
+		if (br_i15_sub(k, n, 0)) {
+			break;
+		}
+	}
+
+	/*
+	 * Compute k*G and extract the X coordinate, then reduce it
+	 * modulo the curve order. Since we support only curves with
+	 * prime order, that reduction is only a matter of computing
+	 * a subtraction.
+	 */
+	br_i15_encode(tt, nlen, k);
+	ulen = impl->mulgen(eU, tt, nlen, sk->curve);
+	br_i15_zero(r, n[0]);
+	br_i15_decode(r, &eU[1], ulen >> 1);
+	r[0] = n[0];
+	br_i15_sub(r, n, br_i15_sub(r, n, 0) ^ 1);
+
+	/*
+	 * Compute 1/k in double-Montgomery representation. We do so by
+	 * first converting _from_ Montgomery representation (twice),
+	 * then using a modular exponentiation.
+	 */
+	br_i15_from_monty(k, n, n0i);
+	br_i15_from_monty(k, n, n0i);
+	memcpy(tt, cd->order, nlen);
+	tt[nlen - 1] -= 2;
+	br_i15_modpow(k, tt, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Compute s = (m+xr)/k (mod n).
+	 * The k[] array contains R^2/k (double-Montgomery representation);
+	 * we thus can use direct Montgomery multiplications and conversions
+	 * from Montgomery, avoiding any call to br_i15_to_monty() (which
+	 * is slower).
+	 */
+	br_i15_from_monty(m, n, n0i);
+	br_i15_montymul(t1, x, r, n, n0i);
+	ctl = br_i15_add(t1, m, 1);
+	ctl |= br_i15_sub(t1, n, 0) ^ 1;
+	br_i15_sub(t1, n, ctl);
+	br_i15_montymul(s, t1, k, n, n0i);
+
+	/*
+	 * Encode r and s in the signature.
+	 */
+	br_i15_encode(sig, nlen, r);
+	br_i15_encode((unsigned char *)sig + nlen, nlen, s);
+	return nlen << 1;
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_vrfy_asn1.c b/third_party/bearssl/src/ecdsa_i15_vrfy_asn1.c
new file mode 100644
index 0000000..f4bef99
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_vrfy_asn1.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define FIELD_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * We use a double-sized buffer because a malformed ASN.1 signature
+	 * may trigger a size expansion when converting to "raw" format.
+	 */
+	unsigned char rsig[(FIELD_LEN << 2) + 24];
+
+	if (sig_len > ((sizeof rsig) >> 1)) {
+		return 0;
+	}
+	memcpy(rsig, sig, sig_len);
+	sig_len = br_ecdsa_asn1_to_raw(rsig, sig_len);
+	return br_ecdsa_i15_vrfy_raw(impl, hash, hash_len, pk, rsig, sig_len);
+}
diff --git a/third_party/bearssl/src/ecdsa_i15_vrfy_raw.c b/third_party/bearssl/src/ecdsa_i15_vrfy_raw.c
new file mode 100644
index 0000000..14dd5e4
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i15_vrfy_raw.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I15_LEN     ((BR_MAX_EC_SIZE + 29) / 15)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 */
+	const br_ec_curve_def *cd;
+	uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], t1[I15_LEN], t2[I15_LEN];
+	unsigned char tx[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char ty[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char eU[POINT_LEN];
+	size_t nlen, rlen, ulen;
+	uint16_t n0i;
+	uint32_t res;
+
+	/*
+	 * If the curve is not supported, then report an error.
+	 */
+	if (((impl->supported_curves >> pk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (pk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Signature length must be even.
+	 */
+	if (sig_len & 1) {
+		return 0;
+	}
+	rlen = sig_len >> 1;
+
+	/*
+	 * Public key point must have the proper size for this curve.
+	 */
+	if (pk->qlen != cd->generator_len) {
+		return 0;
+	}
+
+	/*
+	 * Get modulus; then decode the r and s values. They must be
+	 * lower than the modulus, and s must not be null.
+	 */
+	nlen = cd->order_len;
+	br_i15_decode(n, cd->order, nlen);
+	n0i = br_i15_ninv15(n[1]);
+	if (!br_i15_decode_mod(r, sig, rlen, n)) {
+		return 0;
+	}
+	if (!br_i15_decode_mod(s, (const unsigned char *)sig + rlen, rlen, n)) {
+		return 0;
+	}
+	if (br_i15_iszero(s)) {
+		return 0;
+	}
+
+	/*
+	 * Invert s. We do that with a modular exponentiation; we use
+	 * the fact that for all the curves we support, the least
+	 * significant byte is not 0 or 1, so we can subtract 2 without
+	 * any carry to process.
+	 * We also want 1/s in Montgomery representation, which can be
+	 * done by converting _from_ Montgomery representation before
+	 * the inversion (because (1/s)*R = 1/(s/R)).
+	 */
+	br_i15_from_monty(s, n, n0i);
+	memcpy(tx, cd->order, nlen);
+	tx[nlen - 1] -= 2;
+	br_i15_modpow(s, tx, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Truncate the hash to the modulus length (in bits) and reduce
+	 * it modulo the curve order. The modular reduction can be done
+	 * with a subtraction since the truncation already reduced the
+	 * value to the modulus bit length.
+	 */
+	br_ecdsa_i15_bits2int(t1, hash, hash_len, n[0]);
+	br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1);
+
+	/*
+	 * Multiply the (truncated, reduced) hash value with 1/s, result in
+	 * t2, encoded in ty.
+	 */
+	br_i15_montymul(t2, t1, s, n, n0i);
+	br_i15_encode(ty, nlen, t2);
+
+	/*
+	 * Multiply r with 1/s, result in t1, encoded in tx.
+	 */
+	br_i15_montymul(t1, r, s, n, n0i);
+	br_i15_encode(tx, nlen, t1);
+
+	/*
+	 * Compute the point x*Q + y*G.
+	 */
+	ulen = cd->generator_len;
+	memcpy(eU, pk->q, ulen);
+	res = impl->muladd(eU, NULL, ulen,
+		tx, nlen, ty, nlen, cd->curve);
+
+	/*
+	 * Get the X coordinate, reduce modulo the curve order, and
+	 * compare with the 'r' value.
+	 *
+	 * The modular reduction can be done with subtractions because
+	 * we work with curves of prime order, so the curve order is
+	 * close to the field order (Hasse's theorem).
+	 */
+	br_i15_zero(t1, n[0]);
+	br_i15_decode(t1, &eU[1], ulen >> 1);
+	t1[0] = n[0];
+	br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1);
+	res &= ~br_i15_sub(t1, r, 1);
+	res &= br_i15_iszero(t1);
+	return res;
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_bits.c b/third_party/bearssl/src/ecdsa_i31_bits.c
new file mode 100644
index 0000000..9a8d673
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_bits.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_ecdsa_i31_bits2int(uint32_t *x,
+	const void *src, size_t len, uint32_t ebitlen)
+{
+	uint32_t bitlen, hbitlen;
+	int sc;
+
+	bitlen = ebitlen - (ebitlen >> 5);
+	hbitlen = (uint32_t)len << 3;
+	if (hbitlen > bitlen) {
+		len = (bitlen + 7) >> 3;
+		sc = (int)((hbitlen - bitlen) & 7);
+	} else {
+		sc = 0;
+	}
+	br_i31_zero(x, ebitlen);
+	br_i31_decode(x, src, len);
+	br_i31_rshift(x, sc);
+	x[0] = ebitlen;
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_sign_asn1.c b/third_party/bearssl/src/ecdsa_i31_sign_asn1.c
new file mode 100644
index 0000000..cf0d351
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_sign_asn1.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i31_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	unsigned char rsig[(ORDER_LEN << 1) + 12];
+	size_t sig_len;
+
+	sig_len = br_ecdsa_i31_sign_raw(impl, hf, hash_value, sk, rsig);
+	if (sig_len == 0) {
+		return 0;
+	}
+	sig_len = br_ecdsa_raw_to_asn1(rsig, sig_len);
+	memcpy(sig, rsig, sig_len);
+	return sig_len;
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_sign_raw.c b/third_party/bearssl/src/ecdsa_i31_sign_raw.c
new file mode 100644
index 0000000..1df98fe
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_sign_raw.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I31_LEN     ((BR_MAX_EC_SIZE + 61) / 31)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i31_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 * We also rely on the last byte of the curve order to be distinct
+	 * from 0 and 1.
+	 */
+	const br_ec_curve_def *cd;
+	uint32_t n[I31_LEN], r[I31_LEN], s[I31_LEN], x[I31_LEN];
+	uint32_t m[I31_LEN], k[I31_LEN], t1[I31_LEN], t2[I31_LEN];
+	unsigned char tt[ORDER_LEN << 1];
+	unsigned char eU[POINT_LEN];
+	size_t hash_len, nlen, ulen;
+	uint32_t n0i, ctl;
+	br_hmac_drbg_context drbg;
+
+	/*
+	 * If the curve is not supported, then exit with an error.
+	 */
+	if (((impl->supported_curves >> sk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (sk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Get modulus.
+	 */
+	nlen = cd->order_len;
+	br_i31_decode(n, cd->order, nlen);
+	n0i = br_i31_ninv31(n[1]);
+
+	/*
+	 * Get private key as an i31 integer. This also checks that the
+	 * private key is well-defined (not zero, and less than the
+	 * curve order).
+	 */
+	if (!br_i31_decode_mod(x, sk->x, sk->xlen, n)) {
+		return 0;
+	}
+	if (br_i31_iszero(x)) {
+		return 0;
+	}
+
+	/*
+	 * Get hash length.
+	 */
+	hash_len = (hf->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+
+	/*
+	 * Truncate and reduce the hash value modulo the curve order.
+	 */
+	br_ecdsa_i31_bits2int(m, hash_value, hash_len, n[0]);
+	br_i31_sub(m, n, br_i31_sub(m, n, 0) ^ 1);
+
+	/*
+	 * RFC 6979 generation of the "k" value.
+	 *
+	 * The process uses HMAC_DRBG (with the hash function used to
+	 * process the message that is to be signed). The seed is the
+	 * concatenation of the encodings of the private key and
+	 * the hash value (after truncation and modular reduction).
+	 */
+	br_i31_encode(tt, nlen, x);
+	br_i31_encode(tt + nlen, nlen, m);
+	br_hmac_drbg_init(&drbg, hf, tt, nlen << 1);
+	for (;;) {
+		br_hmac_drbg_generate(&drbg, tt, nlen);
+		br_ecdsa_i31_bits2int(k, tt, nlen, n[0]);
+		if (br_i31_iszero(k)) {
+			continue;
+		}
+		if (br_i31_sub(k, n, 0)) {
+			break;
+		}
+	}
+
+	/*
+	 * Compute k*G and extract the X coordinate, then reduce it
+	 * modulo the curve order. Since we support only curves with
+	 * prime order, that reduction is only a matter of computing
+	 * a subtraction.
+	 */
+	br_i31_encode(tt, nlen, k);
+	ulen = impl->mulgen(eU, tt, nlen, sk->curve);
+	br_i31_zero(r, n[0]);
+	br_i31_decode(r, &eU[1], ulen >> 1);
+	r[0] = n[0];
+	br_i31_sub(r, n, br_i31_sub(r, n, 0) ^ 1);
+
+	/*
+	 * Compute 1/k in double-Montgomery representation. We do so by
+	 * first converting _from_ Montgomery representation (twice),
+	 * then using a modular exponentiation.
+	 */
+	br_i31_from_monty(k, n, n0i);
+	br_i31_from_monty(k, n, n0i);
+	memcpy(tt, cd->order, nlen);
+	tt[nlen - 1] -= 2;
+	br_i31_modpow(k, tt, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Compute s = (m+xr)/k (mod n).
+	 * The k[] array contains R^2/k (double-Montgomery representation);
+	 * we thus can use direct Montgomery multiplications and conversions
+	 * from Montgomery, avoiding any call to br_i31_to_monty() (which
+	 * is slower).
+	 */
+	br_i31_from_monty(m, n, n0i);
+	br_i31_montymul(t1, x, r, n, n0i);
+	ctl = br_i31_add(t1, m, 1);
+	ctl |= br_i31_sub(t1, n, 0) ^ 1;
+	br_i31_sub(t1, n, ctl);
+	br_i31_montymul(s, t1, k, n, n0i);
+
+	/*
+	 * Encode r and s in the signature.
+	 */
+	br_i31_encode(sig, nlen, r);
+	br_i31_encode((unsigned char *)sig + nlen, nlen, s);
+	return nlen << 1;
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_vrfy_asn1.c b/third_party/bearssl/src/ecdsa_i31_vrfy_asn1.c
new file mode 100644
index 0000000..4161aaa
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_vrfy_asn1.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define FIELD_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i31_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * We use a double-sized buffer because a malformed ASN.1 signature
+	 * may trigger a size expansion when converting to "raw" format.
+	 */
+	unsigned char rsig[(FIELD_LEN << 2) + 24];
+
+	if (sig_len > ((sizeof rsig) >> 1)) {
+		return 0;
+	}
+	memcpy(rsig, sig, sig_len);
+	sig_len = br_ecdsa_asn1_to_raw(rsig, sig_len);
+	return br_ecdsa_i31_vrfy_raw(impl, hash, hash_len, pk, rsig, sig_len);
+}
diff --git a/third_party/bearssl/src/ecdsa_i31_vrfy_raw.c b/third_party/bearssl/src/ecdsa_i31_vrfy_raw.c
new file mode 100644
index 0000000..259477f
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_i31_vrfy_raw.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I31_LEN     ((BR_MAX_EC_SIZE + 61) / 31)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i31_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 */
+	const br_ec_curve_def *cd;
+	uint32_t n[I31_LEN], r[I31_LEN], s[I31_LEN], t1[I31_LEN], t2[I31_LEN];
+	unsigned char tx[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char ty[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char eU[POINT_LEN];
+	size_t nlen, rlen, ulen;
+	uint32_t n0i, res;
+
+	/*
+	 * If the curve is not supported, then report an error.
+	 */
+	if (((impl->supported_curves >> pk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (pk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Signature length must be even.
+	 */
+	if (sig_len & 1) {
+		return 0;
+	}
+	rlen = sig_len >> 1;
+
+	/*
+	 * Public key point must have the proper size for this curve.
+	 */
+	if (pk->qlen != cd->generator_len) {
+		return 0;
+	}
+
+	/*
+	 * Get modulus; then decode the r and s values. They must be
+	 * lower than the modulus, and s must not be null.
+	 */
+	nlen = cd->order_len;
+	br_i31_decode(n, cd->order, nlen);
+	n0i = br_i31_ninv31(n[1]);
+	if (!br_i31_decode_mod(r, sig, rlen, n)) {
+		return 0;
+	}
+	if (!br_i31_decode_mod(s, (const unsigned char *)sig + rlen, rlen, n)) {
+		return 0;
+	}
+	if (br_i31_iszero(s)) {
+		return 0;
+	}
+
+	/*
+	 * Invert s. We do that with a modular exponentiation; we use
+	 * the fact that for all the curves we support, the least
+	 * significant byte is not 0 or 1, so we can subtract 2 without
+	 * any carry to process.
+	 * We also want 1/s in Montgomery representation, which can be
+	 * done by converting _from_ Montgomery representation before
+	 * the inversion (because (1/s)*R = 1/(s/R)).
+	 */
+	br_i31_from_monty(s, n, n0i);
+	memcpy(tx, cd->order, nlen);
+	tx[nlen - 1] -= 2;
+	br_i31_modpow(s, tx, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Truncate the hash to the modulus length (in bits) and reduce
+	 * it modulo the curve order. The modular reduction can be done
+	 * with a subtraction since the truncation already reduced the
+	 * value to the modulus bit length.
+	 */
+	br_ecdsa_i31_bits2int(t1, hash, hash_len, n[0]);
+	br_i31_sub(t1, n, br_i31_sub(t1, n, 0) ^ 1);
+
+	/*
+	 * Multiply the (truncated, reduced) hash value with 1/s, result in
+	 * t2, encoded in ty.
+	 */
+	br_i31_montymul(t2, t1, s, n, n0i);
+	br_i31_encode(ty, nlen, t2);
+
+	/*
+	 * Multiply r with 1/s, result in t1, encoded in tx.
+	 */
+	br_i31_montymul(t1, r, s, n, n0i);
+	br_i31_encode(tx, nlen, t1);
+
+	/*
+	 * Compute the point x*Q + y*G.
+	 */
+	ulen = cd->generator_len;
+	memcpy(eU, pk->q, ulen);
+	res = impl->muladd(eU, NULL, ulen,
+		tx, nlen, ty, nlen, cd->curve);
+
+	/*
+	 * Get the X coordinate, reduce modulo the curve order, and
+	 * compare with the 'r' value.
+	 *
+	 * The modular reduction can be done with subtractions because
+	 * we work with curves of prime order, so the curve order is
+	 * close to the field order (Hasse's theorem).
+	 */
+	br_i31_zero(t1, n[0]);
+	br_i31_decode(t1, &eU[1], ulen >> 1);
+	t1[0] = n[0];
+	br_i31_sub(t1, n, br_i31_sub(t1, n, 0) ^ 1);
+	res &= ~br_i31_sub(t1, r, 1);
+	res &= br_i31_iszero(t1);
+	return res;
+}
diff --git a/third_party/bearssl/src/ecdsa_rta.c b/third_party/bearssl/src/ecdsa_rta.c
new file mode 100644
index 0000000..005c62c
--- /dev/null
+++ b/third_party/bearssl/src/ecdsa_rta.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Compute ASN.1 encoded length for the provided integer. The ASN.1
+ * encoding is signed, so its leading bit must have value 0; it must
+ * also be of minimal length (so leading bytes of value 0 must be
+ * removed, except if that would contradict the rule about the sign
+ * bit).
+ */
+static size_t
+asn1_int_length(const unsigned char *x, size_t xlen)
+{
+	while (xlen > 0 && *x == 0) {
+		x ++;
+		xlen --;
+	}
+	if (xlen == 0 || *x >= 0x80) {
+		xlen ++;
+	}
+	return xlen;
+}
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_raw_to_asn1(void *sig, size_t sig_len)
+{
+	/*
+	 * Internal buffer is large enough to accommodate a signature
+	 * such that r and s fit on 125 bytes each (signed encoding),
+	 * meaning a curve order of up to 999 bits. This is the limit
+	 * that ensures "simple" length encodings.
+	 */
+	unsigned char *buf;
+	size_t hlen, rlen, slen, zlen, off;
+	unsigned char tmp[257];
+
+	buf = sig;
+	if ((sig_len & 1) != 0) {
+		return 0;
+	}
+
+	/*
+	 * Compute lengths for the two integers.
+	 */
+	hlen = sig_len >> 1;
+	rlen = asn1_int_length(buf, hlen);
+	slen = asn1_int_length(buf + hlen, hlen);
+	if (rlen > 125 || slen > 125) {
+		return 0;
+	}
+
+	/*
+	 * SEQUENCE header.
+	 */
+	tmp[0] = 0x30;
+	zlen = rlen + slen + 4;
+	if (zlen >= 0x80) {
+		tmp[1] = 0x81;
+		tmp[2] = zlen;
+		off = 3;
+	} else {
+		tmp[1] = zlen;
+		off = 2;
+	}
+
+	/*
+	 * First INTEGER (r).
+	 */
+	tmp[off ++] = 0x02;
+	tmp[off ++] = rlen;
+	if (rlen > hlen) {
+		tmp[off] = 0x00;
+		memcpy(tmp + off + 1, buf, hlen);
+	} else {
+		memcpy(tmp + off, buf + hlen - rlen, rlen);
+	}
+	off += rlen;
+
+	/*
+	 * Second INTEGER (s).
+	 */
+	tmp[off ++] = 0x02;
+	tmp[off ++] = slen;
+	if (slen > hlen) {
+		tmp[off] = 0x00;
+		memcpy(tmp + off + 1, buf + hlen, hlen);
+	} else {
+		memcpy(tmp + off, buf + sig_len - slen, slen);
+	}
+	off += slen;
+
+	/*
+	 * Return ASN.1 signature.
+	 */
+	memcpy(sig, tmp, off);
+	return off;
+}
diff --git a/third_party/bearssl/src/enc16be.c b/third_party/bearssl/src/enc16be.c
new file mode 100644
index 0000000..6e06652
--- /dev/null
+++ b/third_party/bearssl/src/enc16be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc16be(void *dst, const uint16_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc16be(buf, *v ++);
+		buf += 2;
+	}
+}
diff --git a/third_party/bearssl/src/enc16le.c b/third_party/bearssl/src/enc16le.c
new file mode 100644
index 0000000..3e5049a
--- /dev/null
+++ b/third_party/bearssl/src/enc16le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc16le(void *dst, const uint16_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc16le(buf, *v ++);
+		buf += 2;
+	}
+}
diff --git a/third_party/bearssl/src/enc32be.c b/third_party/bearssl/src/enc32be.c
new file mode 100644
index 0000000..97298b5
--- /dev/null
+++ b/third_party/bearssl/src/enc32be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc32be(void *dst, const uint32_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc32be(buf, *v ++);
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/enc32le.c b/third_party/bearssl/src/enc32le.c
new file mode 100644
index 0000000..9e9c856
--- /dev/null
+++ b/third_party/bearssl/src/enc32le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc32le(void *dst, const uint32_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc32le(buf, *v ++);
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/enc64be.c b/third_party/bearssl/src/enc64be.c
new file mode 100644
index 0000000..d548944
--- /dev/null
+++ b/third_party/bearssl/src/enc64be.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc64be(void *dst, const uint64_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc64be(buf, *v ++);
+		buf += 8;
+	}
+}
diff --git a/third_party/bearssl/src/enc64le.c b/third_party/bearssl/src/enc64le.c
new file mode 100644
index 0000000..1f1d68e
--- /dev/null
+++ b/third_party/bearssl/src/enc64le.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_range_enc64le(void *dst, const uint64_t *v, size_t num)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (num -- > 0) {
+		br_enc64le(buf, *v ++);
+		buf += 8;
+	}
+}
diff --git a/third_party/bearssl/src/encode_ec_pk8der.c b/third_party/bearssl/src/encode_ec_pk8der.c
new file mode 100644
index 0000000..53717ce
--- /dev/null
+++ b/third_party/bearssl/src/encode_ec_pk8der.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+size_t
+br_encode_ec_pkcs8_der(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk)
+{
+	/*
+	 * ASN.1 format:
+	 *
+	 *   OneAsymmetricKey ::= SEQUENCE {
+	 *     version                   Version,
+	 *     privateKeyAlgorithm       PrivateKeyAlgorithmIdentifier,
+	 *     privateKey                PrivateKey,
+	 *     attributes            [0] Attributes OPTIONAL,
+	 *     ...,
+	 *     [[2: publicKey        [1] PublicKey OPTIONAL ]],
+	 *     ...
+	 *   }
+	 *
+	 * We don't include attributes or public key (the public key
+	 * is included in the private key value instead). The
+	 * 'version' field is an INTEGER that we will set to 0
+	 * (meaning 'v1', compatible with previous versions of PKCS#8).
+	 * The 'privateKeyAlgorithm' structure is an AlgorithmIdentifier
+	 * whose OID should be id-ecPublicKey, with, as parameters, the
+	 * curve OID. The 'privateKey' is an OCTET STRING, whose value
+	 * is the "raw DER" encoding of the key pair.
+	 */
+
+	/*
+	 * OID id-ecPublicKey (1.2.840.10045.2.1), DER-encoded (with
+	 * the tag).
+	 */
+	static const unsigned char OID_ECPUBKEY[] = {
+		0x06, 0x07, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x02, 0x01
+	};
+
+	size_t len_version, len_privateKeyAlgorithm, len_privateKeyValue;
+	size_t len_privateKey, len_seq;
+	const unsigned char *oid;
+
+	oid = br_get_curve_OID(sk->curve);
+	if (oid == NULL) {
+		return 0;
+	}
+	len_version = 3;
+	len_privateKeyAlgorithm = 2 + sizeof OID_ECPUBKEY + 2 + oid[0];
+	len_privateKeyValue = br_encode_ec_raw_der_inner(NULL, sk, pk, 0);
+	len_privateKey = 1 + len_of_len(len_privateKeyValue)
+		+ len_privateKeyValue;
+	len_seq = len_version + len_privateKeyAlgorithm + len_privateKey;
+
+	if (dest == NULL) {
+		return 1 + len_of_len(len_seq) + len_seq;
+	} else {
+		unsigned char *buf;
+		size_t lenlen;
+
+		buf = dest;
+		*buf ++ = 0x30;  /* SEQUENCE tag */
+		lenlen = br_asn1_encode_length(buf, len_seq);
+		buf += lenlen;
+
+		/* version */
+		*buf ++ = 0x02;
+		*buf ++ = 0x01;
+		*buf ++ = 0x00;
+
+		/* privateKeyAlgorithm */
+		*buf ++ = 0x30;
+		*buf ++ = (sizeof OID_ECPUBKEY) + 2 + oid[0];
+		memcpy(buf, OID_ECPUBKEY, sizeof OID_ECPUBKEY);
+		buf += sizeof OID_ECPUBKEY;
+		*buf ++ = 0x06;
+		memcpy(buf, oid, 1 + oid[0]);
+		buf += 1 + oid[0];
+
+		/* privateKey */
+		*buf ++ = 0x04;
+		buf += br_asn1_encode_length(buf, len_privateKeyValue);
+		br_encode_ec_raw_der_inner(buf, sk, pk, 0);
+
+		return 1 + lenlen + len_seq;
+	}
+}
diff --git a/third_party/bearssl/src/encode_ec_rawder.c b/third_party/bearssl/src/encode_ec_rawder.c
new file mode 100644
index 0000000..5985909
--- /dev/null
+++ b/third_party/bearssl/src/encode_ec_rawder.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+const unsigned char *
+br_get_curve_OID(int curve)
+{
+	static const unsigned char OID_secp256r1[] = {
+		0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07
+	};
+	static const unsigned char OID_secp384r1[] = {
+		0x05, 0x2b, 0x81, 0x04, 0x00, 0x22
+	};
+	static const unsigned char OID_secp521r1[] = {
+		0x05, 0x2b, 0x81, 0x04, 0x00, 0x23
+	};
+
+	switch (curve) {
+	case BR_EC_secp256r1:  return OID_secp256r1;
+	case BR_EC_secp384r1:  return OID_secp384r1;
+	case BR_EC_secp521r1:  return OID_secp521r1;
+	default:
+		return NULL;
+	}
+}
+
+/* see inner.h */
+size_t
+br_encode_ec_raw_der_inner(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk,
+	int include_curve_oid)
+{
+	/*
+	 * ASN.1 format:
+	 *
+	 *   ECPrivateKey ::= SEQUENCE {
+	 *     version        INTEGER { ecPrivkeyVer1(1) } (ecPrivkeyVer1),
+	 *     privateKey     OCTET STRING,
+	 *     parameters [0] ECParameters {{ NamedCurve }} OPTIONAL,
+	 *     publicKey  [1] BIT STRING OPTIONAL
+	 *   }
+	 *
+	 * The tages '[0]' and '[1]' are explicit. The 'ECParameters'
+	 * is a CHOICE; in our case, it will always be an OBJECT IDENTIFIER
+	 * that identifies the curve.
+	 *
+	 * The value of the 'privateKey' field is the raw unsigned big-endian
+	 * encoding of the private key (integer modulo the curve subgroup
+	 * order); there is no INTEGER tag, and the leading bit may be 1.
+	 * Also, leading bytes of value 0x00 are _not_ removed.
+	 *
+	 * The 'publicKey' contents are the raw encoded public key point,
+	 * normally uncompressed (leading byte of value 0x04, followed
+	 * by the unsigned big-endian encodings of the X and Y coordinates,
+	 * padded to the full field length if necessary).
+	 */
+
+	size_t len_version, len_privateKey, len_parameters, len_publicKey;
+	size_t len_publicKey_bits, len_seq;
+	const unsigned char *oid;
+
+	if (include_curve_oid) {
+		oid = br_get_curve_OID(sk->curve);
+		if (oid == NULL) {
+			return 0;
+		}
+	} else {
+		oid = NULL;
+	}
+	len_version = 3;
+	len_privateKey = 1 + len_of_len(sk->xlen) + sk->xlen;
+	if (include_curve_oid) {
+		len_parameters = 4 + oid[0];
+	} else {
+		len_parameters = 0;
+	}
+	if (pk == NULL) {
+		len_publicKey = 0;
+		len_publicKey_bits = 0;
+	} else {
+		len_publicKey_bits = 2 + len_of_len(pk->qlen) + pk->qlen;
+		len_publicKey = 1 + len_of_len(len_publicKey_bits)
+			+ len_publicKey_bits;
+	}
+	len_seq = len_version + len_privateKey + len_parameters + len_publicKey;
+	if (dest == NULL) {
+		return 1 + len_of_len(len_seq) + len_seq;
+	} else {
+		unsigned char *buf;
+		size_t lenlen;
+
+		buf = dest;
+		*buf ++ = 0x30;  /* SEQUENCE tag */
+		lenlen = br_asn1_encode_length(buf, len_seq);
+		buf += lenlen;
+
+		/* version */
+		*buf ++ = 0x02;
+		*buf ++ = 0x01;
+		*buf ++ = 0x01;
+
+		/* privateKey */
+		*buf ++ = 0x04;
+		buf += br_asn1_encode_length(buf, sk->xlen);
+		memcpy(buf, sk->x, sk->xlen);
+		buf += sk->xlen;
+
+		/* parameters */
+		if (include_curve_oid) {
+			*buf ++ = 0xA0;
+			*buf ++ = oid[0] + 2;
+			*buf ++ = 0x06;
+			memcpy(buf, oid, oid[0] + 1);
+			buf += oid[0] + 1;
+		}
+
+		/* publicKey */
+		if (pk != NULL) {
+			*buf ++ = 0xA1;
+			buf += br_asn1_encode_length(buf, len_publicKey_bits);
+			*buf ++ = 0x03;
+			buf += br_asn1_encode_length(buf, pk->qlen + 1);
+			*buf ++ = 0x00;
+			memcpy(buf, pk->q, pk->qlen);
+			/* buf += pk->qlen; */
+		}
+
+		return 1 + lenlen + len_seq;
+	}
+}
+
+/* see bearssl_x509.h */
+size_t
+br_encode_ec_raw_der(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk)
+{
+	return br_encode_ec_raw_der_inner(dest, sk, pk, 1);
+}
diff --git a/third_party/bearssl/src/encode_rsa_pk8der.c b/third_party/bearssl/src/encode_rsa_pk8der.c
new file mode 100644
index 0000000..c053503
--- /dev/null
+++ b/third_party/bearssl/src/encode_rsa_pk8der.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+size_t
+br_encode_rsa_pkcs8_der(void *dest, const br_rsa_private_key *sk,
+	const br_rsa_public_key *pk, const void *d, size_t dlen)
+{
+	/*
+	 * ASN.1 format:
+	 *
+	 *   OneAsymmetricKey ::= SEQUENCE {
+	 *     version                   Version,
+	 *     privateKeyAlgorithm       PrivateKeyAlgorithmIdentifier,
+	 *     privateKey                PrivateKey,
+	 *     attributes            [0] Attributes OPTIONAL,
+	 *     ...,
+	 *     [[2: publicKey        [1] PublicKey OPTIONAL ]],
+	 *     ...
+	 *   }
+	 *
+	 * We don't include attributes or public key. The 'version' field
+	 * is an INTEGER that we will set to 0 (meaning 'v1', compatible
+	 * with previous versions of PKCS#8). The 'privateKeyAlgorithm'
+	 * structure is an AlgorithmIdentifier whose OID should be
+	 * rsaEncryption, with NULL parameters. The 'privateKey' is an
+	 * OCTET STRING, whose value is the "raw DER" encoding of the
+	 * key pair.
+	 *
+	 * Since the private key value comes last, this function really
+	 * adds a header, which is mostly fixed (only some lengths have
+	 * to be modified.
+	 */
+
+	/*
+	 * Concatenation of:
+	 *  - DER encoding of an INTEGER of value 0 (the 'version' field)
+	 *  - DER encoding of a PrivateKeyAlgorithmIdentifier that uses
+	 *    the rsaEncryption OID, and NULL parameters
+	 *  - An OCTET STRING tag
+	 */
+	static const unsigned char PK8_HEAD[] = {
+		0x02, 0x01, 0x00,
+		0x30, 0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86,
+		0xf7, 0x0d, 0x01, 0x01, 0x01, 0x05, 0x00,
+		0x04
+	};
+
+	size_t len_raw, len_seq;
+
+	len_raw = br_encode_rsa_raw_der(NULL, sk, pk, d, dlen);
+	len_seq = (sizeof PK8_HEAD) + len_of_len(len_raw) + len_raw;
+	if (dest == NULL) {
+		return 1 + len_of_len(len_seq) + len_seq;
+	} else {
+		unsigned char *buf;
+		size_t lenlen;
+
+		buf = dest;
+		*buf ++ = 0x30;  /* SEQUENCE tag */
+		lenlen = br_asn1_encode_length(buf, len_seq);
+		buf += lenlen;
+
+		/* version, privateKeyAlgorithm, privateKey tag */
+		memcpy(buf, PK8_HEAD, sizeof PK8_HEAD);
+		buf += sizeof PK8_HEAD;
+
+		/* privateKey */
+		buf += br_asn1_encode_length(buf, len_raw);
+		br_encode_rsa_raw_der(buf, sk, pk, d, dlen);
+
+		return 1 + lenlen + len_seq;
+	}
+}
diff --git a/third_party/bearssl/src/encode_rsa_rawder.c b/third_party/bearssl/src/encode_rsa_rawder.c
new file mode 100644
index 0000000..1a8052b
--- /dev/null
+++ b/third_party/bearssl/src/encode_rsa_rawder.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+size_t
+br_encode_rsa_raw_der(void *dest, const br_rsa_private_key *sk,
+	const br_rsa_public_key *pk, const void *d, size_t dlen)
+{
+	/*
+	 * ASN.1 format:
+	 *
+	 *   RSAPrivateKey ::= SEQUENCE {
+	 *       version           Version,
+	 *       modulus           INTEGER,  -- n
+	 *       publicExponent    INTEGER,  -- e
+	 *       privateExponent   INTEGER,  -- d
+	 *       prime1            INTEGER,  -- p
+	 *       prime2            INTEGER,  -- q
+	 *       exponent1         INTEGER,  -- d mod (p-1)
+	 *       exponent2         INTEGER,  -- d mod (q-1)
+	 *       coefficient       INTEGER,  -- (inverse of q) mod p
+	 *       otherPrimeInfos   OtherPrimeInfos OPTIONAL
+	 *   }
+	 *
+	 * The 'version' field is an INTEGER of value 0 (meaning: there
+	 * are exactly two prime factors), and 'otherPrimeInfos' will
+	 * be absent (because there are exactly two prime factors).
+	 */
+
+	br_asn1_uint num[9];
+	size_t u, slen;
+
+	/*
+	 * For all INTEGER values, get the pointer and length for the
+	 * data bytes.
+	 */
+	num[0] = br_asn1_uint_prepare(NULL, 0);
+	num[1] = br_asn1_uint_prepare(pk->n, pk->nlen);
+	num[2] = br_asn1_uint_prepare(pk->e, pk->elen);
+	num[3] = br_asn1_uint_prepare(d, dlen);
+	num[4] = br_asn1_uint_prepare(sk->p, sk->plen);
+	num[5] = br_asn1_uint_prepare(sk->q, sk->qlen);
+	num[6] = br_asn1_uint_prepare(sk->dp, sk->dplen);
+	num[7] = br_asn1_uint_prepare(sk->dq, sk->dqlen);
+	num[8] = br_asn1_uint_prepare(sk->iq, sk->iqlen);
+
+	/*
+	 * Get the length of the SEQUENCE contents.
+	 */
+	slen = 0;
+	for (u = 0; u < 9; u ++) {
+		uint32_t ilen;
+
+		ilen = num[u].asn1len;
+		slen += 1 + len_of_len(ilen) + ilen;
+	}
+
+	if (dest == NULL) {
+		return 1 + len_of_len(slen) + slen;
+	} else {
+		unsigned char *buf;
+		size_t lenlen;
+
+		buf = dest;
+		*buf ++ = 0x30;  /* SEQUENCE tag */
+		lenlen = br_asn1_encode_length(buf, slen);
+		buf += lenlen;
+		for (u = 0; u < 9; u ++) {
+			buf += br_asn1_encode_uint(buf, num[u]);
+		}
+		return 1 + lenlen + slen;
+	}
+}
diff --git a/third_party/bearssl/src/gcm.c b/third_party/bearssl/src/gcm.c
new file mode 100644
index 0000000..ede5f08
--- /dev/null
+++ b/third_party/bearssl/src/gcm.c
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * ====================
+ *
+ * Since CTR and GHASH implementations can handle only full blocks, a
+ * 16-byte buffer (buf[]) is maintained in the context:
+ *
+ *  - When processing AAD, buf[] contains the 0-15 unprocessed bytes.
+ *
+ *  - When doing CTR encryption / decryption, buf[] contains the AES output
+ *    for the last partial block, to be used with the next few bytes of
+ *    data, as well as the already encrypted bytes. For instance, if the
+ *    processed data length so far is 21 bytes, then buf[0..4] contains
+ *    the five last encrypted bytes, and buf[5..15] contains the next 11
+ *    AES output bytes to be XORed with the next 11 bytes of input.
+ *
+ *    The recorded AES output bytes are used to complete the block when
+ *    the corresponding bytes are obtained. Note that buf[] always
+ *    contains the _encrypted_ bytes, whether we apply encryption or
+ *    decryption: these bytes are used as input to GHASH when the block
+ *    is complete.
+ *
+ * In both cases, the low bits of the data length counters (count_aad,
+ * count_ctr) are used to work out the current situation.
+ */
+
+/* see bearssl_aead.h */
+void
+br_gcm_init(br_gcm_context *ctx, const br_block_ctr_class **bctx, br_ghash gh)
+{
+	unsigned char iv[12];
+
+	ctx->vtable = &br_gcm_vtable;
+	ctx->bctx = bctx;
+	ctx->gh = gh;
+
+	/*
+	 * The GHASH key h[] is the raw encryption of the all-zero
+	 * block. Since we only have a CTR implementation, we use it
+	 * with an all-zero IV and a zero counter, to CTR-encrypt an
+	 * all-zero block.
+	 */
+	memset(ctx->h, 0, sizeof ctx->h);
+	memset(iv, 0, sizeof iv);
+	(*bctx)->run(bctx, iv, 0, ctx->h, sizeof ctx->h);
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_reset(br_gcm_context *ctx, const void *iv, size_t len)
+{
+	/*
+	 * If the provided nonce is 12 bytes, then this is the initial
+	 * IV for CTR mode; it will be used with a counter that starts
+	 * at 2 (value 1 is for encrypting the GHASH output into the tag).
+	 *
+	 * If the provided nonce has any other length, then it is hashed
+	 * (with GHASH) into a 16-byte value that will be the IV for CTR
+	 * (both 12-byte IV and 32-bit counter).
+	 */
+	if (len == 12) {
+		memcpy(ctx->j0_1, iv, 12);
+		ctx->j0_2 = 1;
+	} else {
+		unsigned char ty[16], tmp[16];
+
+		memset(ty, 0, sizeof ty);
+		ctx->gh(ty, ctx->h, iv, len);
+		memset(tmp, 0, 8);
+		br_enc64be(tmp + 8, (uint64_t)len << 3);
+		ctx->gh(ty, ctx->h, tmp, 16);
+		memcpy(ctx->j0_1, ty, 12);
+		ctx->j0_2 = br_dec32be(ty + 12);
+	}
+	ctx->jc = ctx->j0_2 + 1;
+	memset(ctx->y, 0, sizeof ctx->y);
+	ctx->count_aad = 0;
+	ctx->count_ctr = 0;
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_aad_inject(br_gcm_context *ctx, const void *data, size_t len)
+{
+	size_t ptr, dlen;
+
+	ptr = (size_t)ctx->count_aad & (size_t)15;
+	if (ptr != 0) {
+		/*
+		 * If there is a partial block, then we first try to
+		 * complete it.
+		 */
+		size_t clen;
+
+		clen = 16 - ptr;
+		if (len < clen) {
+			memcpy(ctx->buf + ptr, data, len);
+			ctx->count_aad += (uint64_t)len;
+			return;
+		}
+		memcpy(ctx->buf + ptr, data, clen);
+		ctx->gh(ctx->y, ctx->h, ctx->buf, 16);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+		ctx->count_aad += (uint64_t)clen;
+	}
+
+	/*
+	 * Now AAD is aligned on a 16-byte block (with regards to GHASH).
+	 * We process all complete blocks, and save the last partial
+	 * block.
+	 */
+	dlen = len & ~(size_t)15;
+	ctx->gh(ctx->y, ctx->h, data, dlen);
+	memcpy(ctx->buf, (const unsigned char *)data + dlen, len - dlen);
+	ctx->count_aad += (uint64_t)len;
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_flip(br_gcm_context *ctx)
+{
+	/*
+	 * We complete the GHASH computation if there is a partial block.
+	 * The GHASH implementation automatically applies padding with
+	 * zeros.
+	 */
+	size_t ptr;
+
+	ptr = (size_t)ctx->count_aad & (size_t)15;
+	if (ptr != 0) {
+		ctx->gh(ctx->y, ctx->h, ctx->buf, ptr);
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_run(br_gcm_context *ctx, int encrypt, void *data, size_t len)
+{
+	unsigned char *buf;
+	size_t ptr, dlen;
+
+	buf = data;
+	ptr = (size_t)ctx->count_ctr & (size_t)15;
+	if (ptr != 0) {
+		/*
+		 * If we have a partial block, then we try to complete it.
+		 */
+		size_t u, clen;
+
+		clen = 16 - ptr;
+		if (len < clen) {
+			clen = len;
+		}
+		for (u = 0; u < clen; u ++) {
+			unsigned x, y;
+
+			x = buf[u];
+			y = x ^ ctx->buf[ptr + u];
+			ctx->buf[ptr + u] = encrypt ? y : x;
+			buf[u] = y;
+		}
+		ctx->count_ctr += (uint64_t)clen;
+		buf += clen;
+		len -= clen;
+		if (ptr + clen < 16) {
+			return;
+		}
+		ctx->gh(ctx->y, ctx->h, ctx->buf, 16);
+	}
+
+	/*
+	 * Process full blocks.
+	 */
+	dlen = len & ~(size_t)15;
+	if (!encrypt) {
+		ctx->gh(ctx->y, ctx->h, buf, dlen);
+	}
+	ctx->jc = (*ctx->bctx)->run(ctx->bctx, ctx->j0_1, ctx->jc, buf, dlen);
+	if (encrypt) {
+		ctx->gh(ctx->y, ctx->h, buf, dlen);
+	}
+	buf += dlen;
+	len -= dlen;
+	ctx->count_ctr += (uint64_t)dlen;
+
+	if (len > 0) {
+		/*
+		 * There is a partial block.
+		 */
+		size_t u;
+
+		memset(ctx->buf, 0, sizeof ctx->buf);
+		ctx->jc = (*ctx->bctx)->run(ctx->bctx, ctx->j0_1,
+			ctx->jc, ctx->buf, 16);
+		for (u = 0; u < len; u ++) {
+			unsigned x, y;
+
+			x = buf[u];
+			y = x ^ ctx->buf[u];
+			ctx->buf[u] = encrypt ? y : x;
+			buf[u] = y;
+		}
+		ctx->count_ctr += (uint64_t)len;
+	}
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_get_tag(br_gcm_context *ctx, void *tag)
+{
+	size_t ptr;
+	unsigned char tmp[16];
+
+	ptr = (size_t)ctx->count_ctr & (size_t)15;
+	if (ptr > 0) {
+		/*
+		 * There is a partial block: encrypted/decrypted data has
+		 * been produced, but the encrypted bytes must still be
+		 * processed by GHASH.
+		 */
+		ctx->gh(ctx->y, ctx->h, ctx->buf, ptr);
+	}
+
+	/*
+	 * Final block for GHASH: the AAD and plaintext lengths (in bits).
+	 */
+	br_enc64be(tmp, ctx->count_aad << 3);
+	br_enc64be(tmp + 8, ctx->count_ctr << 3);
+	ctx->gh(ctx->y, ctx->h, tmp, 16);
+
+	/*
+	 * Tag is the GHASH output XORed with the encryption of the
+	 * nonce with the initial counter value.
+	 */
+	memcpy(tag, ctx->y, 16);
+	(*ctx->bctx)->run(ctx->bctx, ctx->j0_1, ctx->j0_2, tag, 16);
+}
+
+/* see bearssl_aead.h */
+void
+br_gcm_get_tag_trunc(br_gcm_context *ctx, void *tag, size_t len)
+{
+	unsigned char tmp[16];
+
+	br_gcm_get_tag(ctx, tmp);
+	memcpy(tag, tmp, len);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_gcm_check_tag_trunc(br_gcm_context *ctx, const void *tag, size_t len)
+{
+	unsigned char tmp[16];
+	size_t u;
+	int x;
+
+	br_gcm_get_tag(ctx, tmp);
+	x = 0;
+	for (u = 0; u < len; u ++) {
+		x |= tmp[u] ^ ((const unsigned char *)tag)[u];
+	}
+	return EQ0(x);
+}
+
+/* see bearssl_aead.h */
+uint32_t
+br_gcm_check_tag(br_gcm_context *ctx, const void *tag)
+{
+	return br_gcm_check_tag_trunc(ctx, tag, 16);
+}
+
+/* see bearssl_aead.h */
+const br_aead_class br_gcm_vtable = {
+	16,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_gcm_reset,
+	(void (*)(const br_aead_class **, const void *, size_t))
+		&br_gcm_aad_inject,
+	(void (*)(const br_aead_class **))
+		&br_gcm_flip,
+	(void (*)(const br_aead_class **, int, void *, size_t))
+		&br_gcm_run,
+	(void (*)(const br_aead_class **, void *))
+		&br_gcm_get_tag,
+	(uint32_t (*)(const br_aead_class **, const void *))
+		&br_gcm_check_tag,
+	(void (*)(const br_aead_class **, void *, size_t))
+		&br_gcm_get_tag_trunc,
+	(uint32_t (*)(const br_aead_class **, const void *, size_t))
+		&br_gcm_check_tag_trunc
+};
diff --git a/third_party/bearssl/src/ghash_ctmul.c b/third_party/bearssl/src/ghash_ctmul.c
new file mode 100644
index 0000000..3623202
--- /dev/null
+++ b/third_party/bearssl/src/ghash_ctmul.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * We compute "carryless multiplications" through normal integer
+ * multiplications, masking out enough bits to create "holes" in which
+ * carries may expand without altering our bits; we really use 8 data
+ * bits per 32-bit word, spaced every fourth bit. Accumulated carries
+ * may not exceed 8 in total, which fits in 4 bits.
+ *
+ * It would be possible to use a 3-bit spacing, allowing two operands,
+ * one with 7 non-zero data bits, the other one with 10 or 11 non-zero
+ * data bits; this asymmetric splitting makes the overall code more
+ * complex with thresholds and exceptions, and does not appear to be
+ * worth the effort.
+ */
+
+/*
+ * We cannot really autodetect whether multiplications are "slow" or
+ * not. A typical example is the ARM Cortex M0+, which exists in two
+ * versions: one with a 1-cycle multiplication opcode, the other with
+ * a 32-cycle multiplication opcode. They both use exactly the same
+ * architecture and ABI, and cannot be distinguished from each other
+ * at compile-time.
+ *
+ * Since most modern CPU (even embedded CPU) still have fast
+ * multiplications, we use the "fast mul" code by default.
+ */
+
+#if BR_SLOW_MUL
+
+/*
+ * This implementation uses Karatsuba-like reduction to make fewer
+ * integer multiplications (9 instead of 16), at the expense of extra
+ * logical operations (XOR, shifts...). On modern x86 CPU that offer
+ * fast, pipelined multiplications, this code is about twice slower than
+ * the simpler code with 16 multiplications. This tendency may be
+ * reversed on low-end platforms with expensive multiplications.
+ */
+
+#define MUL32(h, l, x, y)   do { \
+		uint64_t mul32tmp = MUL(x, y); \
+		(h) = (uint32_t)(mul32tmp >> 32); \
+		(l) = (uint32_t)mul32tmp; \
+	} while (0)
+
+static inline void
+bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y)
+{
+	uint32_t x0, x1, x2, x3;
+	uint32_t y0, y1, y2, y3;
+	uint32_t a0, a1, a2, a3, a4, a5, a6, a7, a8;
+	uint32_t b0, b1, b2, b3, b4, b5, b6, b7, b8;
+
+	x0 = x & (uint32_t)0x11111111;
+	x1 = x & (uint32_t)0x22222222;
+	x2 = x & (uint32_t)0x44444444;
+	x3 = x & (uint32_t)0x88888888;
+	y0 = y & (uint32_t)0x11111111;
+	y1 = y & (uint32_t)0x22222222;
+	y2 = y & (uint32_t)0x44444444;
+	y3 = y & (uint32_t)0x88888888;
+
+	/*
+	 * (x0+W*x1)*(y0+W*y1) -> a0:b0
+	 * (x2+W*x3)*(y2+W*y3) -> a3:b3
+	 * ((x0+x2)+W*(x1+x3))*((y0+y2)+W*(y1+y3)) -> a6:b6
+	 */
+	a0 = x0;
+	b0 = y0;
+	a1 = x1 >> 1;
+	b1 = y1 >> 1;
+	a2 = a0 ^ a1;
+	b2 = b0 ^ b1;
+	a3 = x2 >> 2;
+	b3 = y2 >> 2;
+	a4 = x3 >> 3;
+	b4 = y3 >> 3;
+	a5 = a3 ^ a4;
+	b5 = b3 ^ b4;
+	a6 = a0 ^ a3;
+	b6 = b0 ^ b3;
+	a7 = a1 ^ a4;
+	b7 = b1 ^ b4;
+	a8 = a6 ^ a7;
+	b8 = b6 ^ b7;
+
+	MUL32(b0, a0, b0, a0);
+	MUL32(b1, a1, b1, a1);
+	MUL32(b2, a2, b2, a2);
+	MUL32(b3, a3, b3, a3);
+	MUL32(b4, a4, b4, a4);
+	MUL32(b5, a5, b5, a5);
+	MUL32(b6, a6, b6, a6);
+	MUL32(b7, a7, b7, a7);
+	MUL32(b8, a8, b8, a8);
+
+	a0 &= (uint32_t)0x11111111;
+	a1 &= (uint32_t)0x11111111;
+	a2 &= (uint32_t)0x11111111;
+	a3 &= (uint32_t)0x11111111;
+	a4 &= (uint32_t)0x11111111;
+	a5 &= (uint32_t)0x11111111;
+	a6 &= (uint32_t)0x11111111;
+	a7 &= (uint32_t)0x11111111;
+	a8 &= (uint32_t)0x11111111;
+	b0 &= (uint32_t)0x11111111;
+	b1 &= (uint32_t)0x11111111;
+	b2 &= (uint32_t)0x11111111;
+	b3 &= (uint32_t)0x11111111;
+	b4 &= (uint32_t)0x11111111;
+	b5 &= (uint32_t)0x11111111;
+	b6 &= (uint32_t)0x11111111;
+	b7 &= (uint32_t)0x11111111;
+	b8 &= (uint32_t)0x11111111;
+
+	a2 ^= a0 ^ a1;
+	b2 ^= b0 ^ b1;
+	a0 ^= (a2 << 1) ^ (a1 << 2);
+	b0 ^= (b2 << 1) ^ (b1 << 2);
+	a5 ^= a3 ^ a4;
+	b5 ^= b3 ^ b4;
+	a3 ^= (a5 << 1) ^ (a4 << 2);
+	b3 ^= (b5 << 1) ^ (b4 << 2);
+	a8 ^= a6 ^ a7;
+	b8 ^= b6 ^ b7;
+	a6 ^= (a8 << 1) ^ (a7 << 2);
+	b6 ^= (b8 << 1) ^ (b7 << 2);
+	a6 ^= a0 ^ a3;
+	b6 ^= b0 ^ b3;
+	*lo = a0 ^ (a6 << 2) ^ (a3 << 4);
+	*hi = b0 ^ (b6 << 2) ^ (b3 << 4) ^ (a6 >> 30) ^ (a3 >> 28);
+}
+
+#else
+
+/*
+ * Simple multiplication in GF(2)[X], using 16 integer multiplications.
+ */
+
+static inline void
+bmul(uint32_t *hi, uint32_t *lo, uint32_t x, uint32_t y)
+{
+	uint32_t x0, x1, x2, x3;
+	uint32_t y0, y1, y2, y3;
+	uint64_t z0, z1, z2, z3;
+	uint64_t z;
+
+	x0 = x & (uint32_t)0x11111111;
+	x1 = x & (uint32_t)0x22222222;
+	x2 = x & (uint32_t)0x44444444;
+	x3 = x & (uint32_t)0x88888888;
+	y0 = y & (uint32_t)0x11111111;
+	y1 = y & (uint32_t)0x22222222;
+	y2 = y & (uint32_t)0x44444444;
+	y3 = y & (uint32_t)0x88888888;
+	z0 = MUL(x0, y0) ^ MUL(x1, y3) ^ MUL(x2, y2) ^ MUL(x3, y1);
+	z1 = MUL(x0, y1) ^ MUL(x1, y0) ^ MUL(x2, y3) ^ MUL(x3, y2);
+	z2 = MUL(x0, y2) ^ MUL(x1, y1) ^ MUL(x2, y0) ^ MUL(x3, y3);
+	z3 = MUL(x0, y3) ^ MUL(x1, y2) ^ MUL(x2, y1) ^ MUL(x3, y0);
+	z0 &= (uint64_t)0x1111111111111111;
+	z1 &= (uint64_t)0x2222222222222222;
+	z2 &= (uint64_t)0x4444444444444444;
+	z3 &= (uint64_t)0x8888888888888888;
+	z = z0 | z1 | z2 | z3;
+	*lo = (uint32_t)z;
+	*hi = (uint32_t)(z >> 32);
+}
+
+#endif
+
+/* see bearssl_hash.h */
+void
+br_ghash_ctmul(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf, *hb;
+	unsigned char *yb;
+	uint32_t yw[4];
+	uint32_t hw[4];
+
+	/*
+	 * Throughout the loop we handle the y and h values as arrays
+	 * of 32-bit words.
+	 */
+	buf = data;
+	yb = y;
+	hb = h;
+	yw[3] = br_dec32be(yb);
+	yw[2] = br_dec32be(yb + 4);
+	yw[1] = br_dec32be(yb + 8);
+	yw[0] = br_dec32be(yb + 12);
+	hw[3] = br_dec32be(hb);
+	hw[2] = br_dec32be(hb + 4);
+	hw[1] = br_dec32be(hb + 8);
+	hw[0] = br_dec32be(hb + 12);
+	while (len > 0) {
+		const unsigned char *src;
+		unsigned char tmp[16];
+		int i;
+		uint32_t a[9], b[9], zw[8];
+		uint32_t c0, c1, c2, c3, d0, d1, d2, d3, e0, e1, e2, e3;
+
+		/*
+		 * Get the next 16-byte block (using zero-padding if
+		 * necessary).
+		 */
+		if (len >= 16) {
+			src = buf;
+			buf += 16;
+			len -= 16;
+		} else {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			src = tmp;
+			len = 0;
+		}
+
+		/*
+		 * Decode the block. The GHASH standard mandates
+		 * big-endian encoding.
+		 */
+		yw[3] ^= br_dec32be(src);
+		yw[2] ^= br_dec32be(src + 4);
+		yw[1] ^= br_dec32be(src + 8);
+		yw[0] ^= br_dec32be(src + 12);
+
+		/*
+		 * We multiply two 128-bit field elements. We use
+		 * Karatsuba to turn that into three 64-bit
+		 * multiplications, which are themselves done with a
+		 * total of nine 32-bit multiplications.
+		 */
+
+		/*
+		 * y[0,1]*h[0,1] -> 0..2
+		 * y[2,3]*h[2,3] -> 3..5
+		 * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6..8
+		 */
+		a[0] = yw[0];
+		b[0] = hw[0];
+		a[1] = yw[1];
+		b[1] = hw[1];
+		a[2] = a[0] ^ a[1];
+		b[2] = b[0] ^ b[1];
+
+		a[3] = yw[2];
+		b[3] = hw[2];
+		a[4] = yw[3];
+		b[4] = hw[3];
+		a[5] = a[3] ^ a[4];
+		b[5] = b[3] ^ b[4];
+
+		a[6] = a[0] ^ a[3];
+		b[6] = b[0] ^ b[3];
+		a[7] = a[1] ^ a[4];
+		b[7] = b[1] ^ b[4];
+		a[8] = a[6] ^ a[7];
+		b[8] = b[6] ^ b[7];
+
+		for (i = 0; i < 9; i ++) {
+			bmul(&b[i], &a[i], b[i], a[i]);
+		}
+
+		c0 = a[0];
+		c1 = b[0] ^ a[2] ^ a[0] ^ a[1];
+		c2 = a[1] ^ b[2] ^ b[0] ^ b[1];
+		c3 = b[1];
+		d0 = a[3];
+		d1 = b[3] ^ a[5] ^ a[3] ^ a[4];
+		d2 = a[4] ^ b[5] ^ b[3] ^ b[4];
+		d3 = b[4];
+		e0 = a[6];
+		e1 = b[6] ^ a[8] ^ a[6] ^ a[7];
+		e2 = a[7] ^ b[8] ^ b[6] ^ b[7];
+		e3 = b[7];
+
+		e0 ^= c0 ^ d0;
+		e1 ^= c1 ^ d1;
+		e2 ^= c2 ^ d2;
+		e3 ^= c3 ^ d3;
+		c2 ^= e0;
+		c3 ^= e1;
+		d0 ^= e2;
+		d1 ^= e3;
+
+		/*
+		 * GHASH specification has the bits "reversed" (most
+		 * significant is in fact least significant), which does
+		 * not matter for a carryless multiplication, except that
+		 * the 255-bit result must be shifted by 1 bit.
+		 */
+		zw[0] = c0 << 1;
+		zw[1] = (c1 << 1) | (c0 >> 31);
+		zw[2] = (c2 << 1) | (c1 >> 31);
+		zw[3] = (c3 << 1) | (c2 >> 31);
+		zw[4] = (d0 << 1) | (c3 >> 31);
+		zw[5] = (d1 << 1) | (d0 >> 31);
+		zw[6] = (d2 << 1) | (d1 >> 31);
+		zw[7] = (d3 << 1) | (d2 >> 31);
+
+		/*
+		 * We now do the reduction modulo the field polynomial
+		 * to get back to 128 bits.
+		 */
+		for (i = 0; i < 4; i ++) {
+			uint32_t lw;
+
+			lw = zw[i];
+			zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7);
+			zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25);
+		}
+		memcpy(yw, zw + 4, sizeof yw);
+	}
+
+	/*
+	 * Encode back the result.
+	 */
+	br_enc32be(yb, yw[3]);
+	br_enc32be(yb + 4, yw[2]);
+	br_enc32be(yb + 8, yw[1]);
+	br_enc32be(yb + 12, yw[0]);
+}
diff --git a/third_party/bearssl/src/ghash_ctmul32.c b/third_party/bearssl/src/ghash_ctmul32.c
new file mode 100644
index 0000000..c66af46
--- /dev/null
+++ b/third_party/bearssl/src/ghash_ctmul32.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This implementation uses 32-bit multiplications, and only the low
+ * 32 bits for each multiplication result. This is meant primarily for
+ * the ARM Cortex M0 and M0+, whose multiplication opcode does not yield
+ * the upper 32 bits; but it might also be useful on architectures where
+ * access to the upper 32 bits requires use of specific registers that
+ * create contention (e.g. on i386, "mul" necessarily outputs the result
+ * in edx:eax, while "imul" can use any registers but is limited to the
+ * low 32 bits).
+ *
+ * The implementation trick that is used here is bit-reversing (bit 0
+ * is swapped with bit 31, bit 1 with bit 30, and so on). In GF(2)[X],
+ * for all values x and y, we have:
+ *    rev32(x) * rev32(y) = rev64(x * y)
+ * In other words, if we bit-reverse (over 32 bits) the operands, then we
+ * bit-reverse (over 64 bits) the result.
+ */
+
+/*
+ * Multiplication in GF(2)[X], truncated to its low 32 bits.
+ */
+static inline uint32_t
+bmul32(uint32_t x, uint32_t y)
+{
+	uint32_t x0, x1, x2, x3;
+	uint32_t y0, y1, y2, y3;
+	uint32_t z0, z1, z2, z3;
+
+	x0 = x & (uint32_t)0x11111111;
+	x1 = x & (uint32_t)0x22222222;
+	x2 = x & (uint32_t)0x44444444;
+	x3 = x & (uint32_t)0x88888888;
+	y0 = y & (uint32_t)0x11111111;
+	y1 = y & (uint32_t)0x22222222;
+	y2 = y & (uint32_t)0x44444444;
+	y3 = y & (uint32_t)0x88888888;
+	z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
+	z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
+	z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
+	z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
+	z0 &= (uint32_t)0x11111111;
+	z1 &= (uint32_t)0x22222222;
+	z2 &= (uint32_t)0x44444444;
+	z3 &= (uint32_t)0x88888888;
+	return z0 | z1 | z2 | z3;
+}
+
+/*
+ * Bit-reverse a 32-bit word.
+ */
+static uint32_t
+rev32(uint32_t x)
+{
+#define RMS(m, s)   do { \
+		x = ((x & (uint32_t)(m)) << (s)) \
+			| ((x >> (s)) & (uint32_t)(m)); \
+	} while (0)
+
+	RMS(0x55555555, 1);
+	RMS(0x33333333, 2);
+	RMS(0x0F0F0F0F, 4);
+	RMS(0x00FF00FF, 8);
+	return (x << 16) | (x >> 16);
+
+#undef RMS
+}
+
+/* see bearssl_hash.h */
+void
+br_ghash_ctmul32(void *y, const void *h, const void *data, size_t len)
+{
+	/*
+	 * This implementation is similar to br_ghash_ctmul() except
+	 * that we have to do the multiplication twice, with the
+	 * "normal" and "bit reversed" operands. Hence we end up with
+	 * eighteen 32-bit multiplications instead of nine.
+	 */
+
+	const unsigned char *buf, *hb;
+	unsigned char *yb;
+	uint32_t yw[4];
+	uint32_t hw[4], hwr[4];
+
+	buf = data;
+	yb = y;
+	hb = h;
+	yw[3] = br_dec32be(yb);
+	yw[2] = br_dec32be(yb + 4);
+	yw[1] = br_dec32be(yb + 8);
+	yw[0] = br_dec32be(yb + 12);
+	hw[3] = br_dec32be(hb);
+	hw[2] = br_dec32be(hb + 4);
+	hw[1] = br_dec32be(hb + 8);
+	hw[0] = br_dec32be(hb + 12);
+	hwr[3] = rev32(hw[3]);
+	hwr[2] = rev32(hw[2]);
+	hwr[1] = rev32(hw[1]);
+	hwr[0] = rev32(hw[0]);
+	while (len > 0) {
+		const unsigned char *src;
+		unsigned char tmp[16];
+		int i;
+		uint32_t a[18], b[18], c[18];
+		uint32_t d0, d1, d2, d3, d4, d5, d6, d7;
+		uint32_t zw[8];
+
+		if (len >= 16) {
+			src = buf;
+			buf += 16;
+			len -= 16;
+		} else {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			src = tmp;
+			len = 0;
+		}
+		yw[3] ^= br_dec32be(src);
+		yw[2] ^= br_dec32be(src + 4);
+		yw[1] ^= br_dec32be(src + 8);
+		yw[0] ^= br_dec32be(src + 12);
+
+		/*
+		 * We are using Karatsuba: the 128x128 multiplication is
+		 * reduced to three 64x64 multiplications, hence nine
+		 * 32x32 multiplications. With the bit-reversal trick,
+		 * we have to perform 18 32x32 multiplications.
+		 */
+
+		/*
+		 * y[0,1]*h[0,1] -> 0,1,4
+		 * y[2,3]*h[2,3] -> 2,3,5
+		 * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6,7,8
+		 */
+
+		a[0] = yw[0];
+		a[1] = yw[1];
+		a[2] = yw[2];
+		a[3] = yw[3];
+		a[4] = a[0] ^ a[1];
+		a[5] = a[2] ^ a[3];
+		a[6] = a[0] ^ a[2];
+		a[7] = a[1] ^ a[3];
+		a[8] = a[6] ^ a[7];
+
+		a[ 9] = rev32(yw[0]);
+		a[10] = rev32(yw[1]);
+		a[11] = rev32(yw[2]);
+		a[12] = rev32(yw[3]);
+		a[13] = a[ 9] ^ a[10];
+		a[14] = a[11] ^ a[12];
+		a[15] = a[ 9] ^ a[11];
+		a[16] = a[10] ^ a[12];
+		a[17] = a[15] ^ a[16];
+
+		b[0] = hw[0];
+		b[1] = hw[1];
+		b[2] = hw[2];
+		b[3] = hw[3];
+		b[4] = b[0] ^ b[1];
+		b[5] = b[2] ^ b[3];
+		b[6] = b[0] ^ b[2];
+		b[7] = b[1] ^ b[3];
+		b[8] = b[6] ^ b[7];
+
+		b[ 9] = hwr[0];
+		b[10] = hwr[1];
+		b[11] = hwr[2];
+		b[12] = hwr[3];
+		b[13] = b[ 9] ^ b[10];
+		b[14] = b[11] ^ b[12];
+		b[15] = b[ 9] ^ b[11];
+		b[16] = b[10] ^ b[12];
+		b[17] = b[15] ^ b[16];
+
+		for (i = 0; i < 18; i ++) {
+			c[i] = bmul32(a[i], b[i]);
+		}
+
+		c[4] ^= c[0] ^ c[1];
+		c[5] ^= c[2] ^ c[3];
+		c[8] ^= c[6] ^ c[7];
+
+		c[13] ^= c[ 9] ^ c[10];
+		c[14] ^= c[11] ^ c[12];
+		c[17] ^= c[15] ^ c[16];
+
+		/*
+		 * y[0,1]*h[0,1] -> 0,9^4,1^13,10
+		 * y[2,3]*h[2,3] -> 2,11^5,3^14,12
+		 * (y[0,1]+y[2,3])*(h[0,1]+h[2,3]) -> 6,15^8,7^17,16
+		 */
+		d0 = c[0];
+		d1 = c[4] ^ (rev32(c[9]) >> 1);
+		d2 = c[1] ^ c[0] ^ c[2] ^ c[6] ^ (rev32(c[13]) >> 1);
+		d3 = c[4] ^ c[5] ^ c[8]
+			^ (rev32(c[10] ^ c[9] ^ c[11] ^ c[15]) >> 1);
+		d4 = c[2] ^ c[1] ^ c[3] ^ c[7]
+			^ (rev32(c[13] ^ c[14] ^ c[17]) >> 1);
+		d5 = c[5] ^ (rev32(c[11] ^ c[10] ^ c[12] ^ c[16]) >> 1);
+		d6 = c[3] ^ (rev32(c[14]) >> 1);
+		d7 = rev32(c[12]) >> 1;
+
+		zw[0] = d0 << 1;
+		zw[1] = (d1 << 1) | (d0 >> 31);
+		zw[2] = (d2 << 1) | (d1 >> 31);
+		zw[3] = (d3 << 1) | (d2 >> 31);
+		zw[4] = (d4 << 1) | (d3 >> 31);
+		zw[5] = (d5 << 1) | (d4 >> 31);
+		zw[6] = (d6 << 1) | (d5 >> 31);
+		zw[7] = (d7 << 1) | (d6 >> 31);
+
+		for (i = 0; i < 4; i ++) {
+			uint32_t lw;
+
+			lw = zw[i];
+			zw[i + 4] ^= lw ^ (lw >> 1) ^ (lw >> 2) ^ (lw >> 7);
+			zw[i + 3] ^= (lw << 31) ^ (lw << 30) ^ (lw << 25);
+		}
+		memcpy(yw, zw + 4, sizeof yw);
+	}
+	br_enc32be(yb, yw[3]);
+	br_enc32be(yb + 4, yw[2]);
+	br_enc32be(yb + 8, yw[1]);
+	br_enc32be(yb + 12, yw[0]);
+}
diff --git a/third_party/bearssl/src/ghash_ctmul64.c b/third_party/bearssl/src/ghash_ctmul64.c
new file mode 100644
index 0000000..a46f16f
--- /dev/null
+++ b/third_party/bearssl/src/ghash_ctmul64.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This is the 64-bit variant of br_ghash_ctmul32(), with 64-bit operands
+ * and bit reversal of 64-bit words.
+ */
+
+static inline uint64_t
+bmul64(uint64_t x, uint64_t y)
+{
+	uint64_t x0, x1, x2, x3;
+	uint64_t y0, y1, y2, y3;
+	uint64_t z0, z1, z2, z3;
+
+	x0 = x & (uint64_t)0x1111111111111111;
+	x1 = x & (uint64_t)0x2222222222222222;
+	x2 = x & (uint64_t)0x4444444444444444;
+	x3 = x & (uint64_t)0x8888888888888888;
+	y0 = y & (uint64_t)0x1111111111111111;
+	y1 = y & (uint64_t)0x2222222222222222;
+	y2 = y & (uint64_t)0x4444444444444444;
+	y3 = y & (uint64_t)0x8888888888888888;
+	z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
+	z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
+	z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
+	z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
+	z0 &= (uint64_t)0x1111111111111111;
+	z1 &= (uint64_t)0x2222222222222222;
+	z2 &= (uint64_t)0x4444444444444444;
+	z3 &= (uint64_t)0x8888888888888888;
+	return z0 | z1 | z2 | z3;
+}
+
+static uint64_t
+rev64(uint64_t x)
+{
+#define RMS(m, s)   do { \
+		x = ((x & (uint64_t)(m)) << (s)) \
+			| ((x >> (s)) & (uint64_t)(m)); \
+	} while (0)
+
+	RMS(0x5555555555555555,  1);
+	RMS(0x3333333333333333,  2);
+	RMS(0x0F0F0F0F0F0F0F0F,  4);
+	RMS(0x00FF00FF00FF00FF,  8);
+	RMS(0x0000FFFF0000FFFF, 16);
+	return (x << 32) | (x >> 32);
+
+#undef RMS
+}
+
+/* see bearssl_ghash.h */
+void
+br_ghash_ctmul64(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf, *hb;
+	unsigned char *yb;
+	uint64_t y0, y1;
+	uint64_t h0, h1, h2, h0r, h1r, h2r;
+
+	buf = data;
+	yb = y;
+	hb = h;
+	y1 = br_dec64be(yb);
+	y0 = br_dec64be(yb + 8);
+	h1 = br_dec64be(hb);
+	h0 = br_dec64be(hb + 8);
+	h0r = rev64(h0);
+	h1r = rev64(h1);
+	h2 = h0 ^ h1;
+	h2r = h0r ^ h1r;
+	while (len > 0) {
+		const unsigned char *src;
+		unsigned char tmp[16];
+		uint64_t y0r, y1r, y2, y2r;
+		uint64_t z0, z1, z2, z0h, z1h, z2h;
+		uint64_t v0, v1, v2, v3;
+
+		if (len >= 16) {
+			src = buf;
+			buf += 16;
+			len -= 16;
+		} else {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			src = tmp;
+			len = 0;
+		}
+		y1 ^= br_dec64be(src);
+		y0 ^= br_dec64be(src + 8);
+
+		y0r = rev64(y0);
+		y1r = rev64(y1);
+		y2 = y0 ^ y1;
+		y2r = y0r ^ y1r;
+
+		z0 = bmul64(y0, h0);
+		z1 = bmul64(y1, h1);
+		z2 = bmul64(y2, h2);
+		z0h = bmul64(y0r, h0r);
+		z1h = bmul64(y1r, h1r);
+		z2h = bmul64(y2r, h2r);
+		z2 ^= z0 ^ z1;
+		z2h ^= z0h ^ z1h;
+		z0h = rev64(z0h) >> 1;
+		z1h = rev64(z1h) >> 1;
+		z2h = rev64(z2h) >> 1;
+
+		v0 = z0;
+		v1 = z0h ^ z2;
+		v2 = z1 ^ z2h;
+		v3 = z1h;
+
+		v3 = (v3 << 1) | (v2 >> 63);
+		v2 = (v2 << 1) | (v1 >> 63);
+		v1 = (v1 << 1) | (v0 >> 63);
+		v0 = (v0 << 1);
+
+		v2 ^= v0 ^ (v0 >> 1) ^ (v0 >> 2) ^ (v0 >> 7);
+		v1 ^= (v0 << 63) ^ (v0 << 62) ^ (v0 << 57);
+		v3 ^= v1 ^ (v1 >> 1) ^ (v1 >> 2) ^ (v1 >> 7);
+		v2 ^= (v1 << 63) ^ (v1 << 62) ^ (v1 << 57);
+
+		y0 = v2;
+		y1 = v3;
+	}
+
+	br_enc64be(yb, y1);
+	br_enc64be(yb + 8, y0);
+}
diff --git a/third_party/bearssl/src/ghash_pclmul.c b/third_party/bearssl/src/ghash_pclmul.c
new file mode 100644
index 0000000..a58e7dc
--- /dev/null
+++ b/third_party/bearssl/src/ghash_pclmul.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+/*
+ * This is the GHASH implementation that leverages the pclmulqdq opcode
+ * (from the AES-NI instructions).
+ */
+
+#if BR_AES_X86NI
+
+/*
+ * Test CPU support for PCLMULQDQ.
+ */
+static inline int
+pclmul_supported(void)
+{
+	/*
+	 * Bit mask for features in ECX:
+	 *    1   PCLMULQDQ support
+	 */
+	return br_cpuid(0, 0, 0x00000002, 0);
+}
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pclmul_get(void)
+{
+	return pclmul_supported() ? &br_ghash_pclmul : 0;
+}
+
+BR_TARGETS_X86_UP
+
+/*
+ * GHASH is defined over elements of GF(2^128) with "full little-endian"
+ * representation: leftmost byte is least significant, and, within each
+ * byte, leftmost _bit_ is least significant. The natural ordering in
+ * x86 is "mixed little-endian": bytes are ordered from least to most
+ * significant, but bits within a byte are in most-to-least significant
+ * order. Going to full little-endian representation would require
+ * reversing bits within each byte, which is doable but expensive.
+ *
+ * Instead, we go to full big-endian representation, by swapping bytes
+ * around, which is done with a single _mm_shuffle_epi8() opcode (it
+ * comes with SSSE3; all CPU that offer pclmulqdq also have SSSE3). We
+ * can use a full big-endian representation because in a carryless
+ * multiplication, we have a nice bit reversal property:
+ *
+ *    rev_128(x) * rev_128(y) = rev_255(x * y)
+ *
+ * So by using full big-endian, we still get the right result, except
+ * that it is right-shifted by 1 bit. The left-shift is relatively
+ * inexpensive, and it can be mutualised.
+ *
+ *
+ * Since SSE2 opcodes do not have facilities for shitfting full 128-bit
+ * values with bit precision, we have to break down values into 64-bit
+ * chunks. We number chunks from 0 to 3 in left to right order.
+ */
+
+/*
+ * Byte-swap a complete 128-bit value. This normally uses
+ * _mm_shuffle_epi8(), which gets translated to pshufb (an SSSE3 opcode).
+ * However, this crashes old Clang versions, so, for Clang before 3.8,
+ * we use an alternate (and less efficient) version.
+ */
+#if BR_CLANG && !BR_CLANG_3_8
+#define BYTESWAP_DECL
+#define BYTESWAP_PREP   (void)0
+#define BYTESWAP(x)   do { \
+		__m128i byteswap1, byteswap2; \
+		byteswap1 = (x); \
+		byteswap2 = _mm_srli_epi16(byteswap1, 8); \
+		byteswap1 = _mm_slli_epi16(byteswap1, 8); \
+		byteswap1 = _mm_or_si128(byteswap1, byteswap2); \
+		byteswap1 = _mm_shufflelo_epi16(byteswap1, 0x1B); \
+		byteswap1 = _mm_shufflehi_epi16(byteswap1, 0x1B); \
+		(x) = _mm_shuffle_epi32(byteswap1, 0x4E); \
+	} while (0)
+#else
+#define BYTESWAP_DECL   __m128i byteswap_index;
+#define BYTESWAP_PREP   do { \
+		byteswap_index = _mm_set_epi8( \
+			0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
+	} while (0)
+#define BYTESWAP(x)   do { \
+		(x) = _mm_shuffle_epi8((x), byteswap_index); \
+	} while (0)
+#endif
+
+/*
+ * Call pclmulqdq. Clang appears to have trouble with the intrinsic, so,
+ * for that compiler, we use inline assembly. Inline assembly is
+ * potentially a bit slower because the compiler does not understand
+ * what the opcode does, and thus cannot optimize instruction
+ * scheduling.
+ *
+ * We use a target of "sse2" only, so that Clang may still handle the
+ * '__m128i' type and allocate SSE2 registers.
+ */
+#if BR_CLANG
+BR_TARGET("sse2")
+static inline __m128i
+pclmulqdq00(__m128i x, __m128i y)
+{
+	__asm__ ("pclmulqdq $0x00, %1, %0" : "+x" (x) : "x" (y));
+	return x;
+}
+BR_TARGET("sse2")
+static inline __m128i
+pclmulqdq11(__m128i x, __m128i y)
+{
+	__asm__ ("pclmulqdq $0x11, %1, %0" : "+x" (x) : "x" (y));
+	return x;
+}
+#else
+#define pclmulqdq00(x, y)   _mm_clmulepi64_si128(x, y, 0x00)
+#define pclmulqdq11(x, y)   _mm_clmulepi64_si128(x, y, 0x11)
+#endif
+
+/*
+ * From a 128-bit value kw, compute kx as the XOR of the two 64-bit
+ * halves of kw (into the right half of kx; left half is unspecified).
+ */
+#define BK(kw, kx)   do { \
+		kx = _mm_xor_si128(kw, _mm_shuffle_epi32(kw, 0x0E)); \
+	} while (0)
+
+/*
+ * Combine two 64-bit values (k0:k1) into a 128-bit (kw) value and
+ * the XOR of the two values (kx).
+ */
+#define PBK(k0, k1, kw, kx)   do { \
+		kw = _mm_unpacklo_epi64(k1, k0); \
+		kx = _mm_xor_si128(k0, k1); \
+	} while (0)
+
+/*
+ * Left-shift by 1 bit a 256-bit value (in four 64-bit words).
+ */
+#define SL_256(x0, x1, x2, x3)   do { \
+		x0 = _mm_or_si128( \
+			_mm_slli_epi64(x0, 1), \
+			_mm_srli_epi64(x1, 63)); \
+		x1 = _mm_or_si128( \
+			_mm_slli_epi64(x1, 1), \
+			_mm_srli_epi64(x2, 63)); \
+		x2 = _mm_or_si128( \
+			_mm_slli_epi64(x2, 1), \
+			_mm_srli_epi64(x3, 63)); \
+		x3 = _mm_slli_epi64(x3, 1); \
+	} while (0)
+
+/*
+ * Perform reduction in GF(2^128). The 256-bit value is in x0..x3;
+ * result is written in x0..x1.
+ */
+#define REDUCE_F128(x0, x1, x2, x3)   do { \
+		x1 = _mm_xor_si128( \
+			x1, \
+			_mm_xor_si128( \
+				_mm_xor_si128( \
+					x3, \
+					_mm_srli_epi64(x3, 1)), \
+				_mm_xor_si128( \
+					_mm_srli_epi64(x3, 2), \
+					_mm_srli_epi64(x3, 7)))); \
+		x2 = _mm_xor_si128( \
+			_mm_xor_si128( \
+				x2, \
+				_mm_slli_epi64(x3, 63)), \
+			_mm_xor_si128( \
+				_mm_slli_epi64(x3, 62), \
+				_mm_slli_epi64(x3, 57))); \
+		x0 = _mm_xor_si128( \
+			x0, \
+			_mm_xor_si128( \
+				_mm_xor_si128( \
+					x2, \
+					_mm_srli_epi64(x2, 1)), \
+				_mm_xor_si128( \
+					_mm_srli_epi64(x2, 2), \
+					_mm_srli_epi64(x2, 7)))); \
+		x1 = _mm_xor_si128( \
+			_mm_xor_si128( \
+				x1, \
+				_mm_slli_epi64(x2, 63)), \
+			_mm_xor_si128( \
+				_mm_slli_epi64(x2, 62), \
+				_mm_slli_epi64(x2, 57))); \
+	} while (0)
+
+/*
+ * Square value kw into (dw,dx).
+ */
+#define SQUARE_F128(kw, dw, dx)   do { \
+		__m128i z0, z1, z2, z3; \
+		z1 = pclmulqdq11(kw, kw); \
+		z3 = pclmulqdq00(kw, kw); \
+		z0 = _mm_shuffle_epi32(z1, 0x0E); \
+		z2 = _mm_shuffle_epi32(z3, 0x0E); \
+		SL_256(z0, z1, z2, z3); \
+		REDUCE_F128(z0, z1, z2, z3); \
+		PBK(z0, z1, dw, dx); \
+	} while (0)
+
+/* see bearssl_hash.h */
+BR_TARGET("ssse3,pclmul")
+void
+br_ghash_pclmul(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf1, *buf2;
+	unsigned char tmp[64];
+	size_t num4, num1;
+	__m128i yw, h1w, h1x;
+	BYTESWAP_DECL
+
+	/*
+	 * We split data into two chunks. First chunk starts at buf1
+	 * and contains num4 blocks of 64-byte values. Second chunk
+	 * starts at buf2 and contains num1 blocks of 16-byte values.
+	 * We want the first chunk to be as large as possible.
+	 */
+	buf1 = data;
+	num4 = len >> 6;
+	len &= 63;
+	buf2 = buf1 + (num4 << 6);
+	num1 = (len + 15) >> 4;
+	if ((len & 15) != 0) {
+		memcpy(tmp, buf2, len);
+		memset(tmp + len, 0, (num1 << 4) - len);
+		buf2 = tmp;
+	}
+
+	/*
+	 * Preparatory step for endian conversions.
+	 */
+	BYTESWAP_PREP;
+
+	/*
+	 * Load y and h.
+	 */
+	yw = _mm_loadu_si128(y);
+	h1w = _mm_loadu_si128(h);
+	BYTESWAP(yw);
+	BYTESWAP(h1w);
+	BK(h1w, h1x);
+
+	if (num4 > 0) {
+		__m128i h2w, h2x, h3w, h3x, h4w, h4x;
+		__m128i t0, t1, t2, t3;
+
+		/*
+		 * Compute h2 = h^2.
+		 */
+		SQUARE_F128(h1w, h2w, h2x);
+
+		/*
+		 * Compute h3 = h^3 = h*(h^2).
+		 */
+		t1 = pclmulqdq11(h1w, h2w);
+		t3 = pclmulqdq00(h1w, h2w);
+		t2 = _mm_xor_si128(pclmulqdq00(h1x, h2x),
+			_mm_xor_si128(t1, t3));
+		t0 = _mm_shuffle_epi32(t1, 0x0E);
+		t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+		t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+		SL_256(t0, t1, t2, t3);
+		REDUCE_F128(t0, t1, t2, t3);
+		PBK(t0, t1, h3w, h3x);
+
+		/*
+		 * Compute h4 = h^4 = (h^2)^2.
+		 */
+		SQUARE_F128(h2w, h4w, h4x);
+
+		while (num4 -- > 0) {
+			__m128i aw0, aw1, aw2, aw3;
+			__m128i ax0, ax1, ax2, ax3;
+
+			aw0 = _mm_loadu_si128((void *)(buf1 +  0));
+			aw1 = _mm_loadu_si128((void *)(buf1 + 16));
+			aw2 = _mm_loadu_si128((void *)(buf1 + 32));
+			aw3 = _mm_loadu_si128((void *)(buf1 + 48));
+			BYTESWAP(aw0);
+			BYTESWAP(aw1);
+			BYTESWAP(aw2);
+			BYTESWAP(aw3);
+			buf1 += 64;
+
+			aw0 = _mm_xor_si128(aw0, yw);
+			BK(aw1, ax1);
+			BK(aw2, ax2);
+			BK(aw3, ax3);
+			BK(aw0, ax0);
+
+			t1 = _mm_xor_si128(
+				_mm_xor_si128(
+					pclmulqdq11(aw0, h4w),
+					pclmulqdq11(aw1, h3w)),
+				_mm_xor_si128(
+					pclmulqdq11(aw2, h2w),
+					pclmulqdq11(aw3, h1w)));
+			t3 = _mm_xor_si128(
+				_mm_xor_si128(
+					pclmulqdq00(aw0, h4w),
+					pclmulqdq00(aw1, h3w)),
+				_mm_xor_si128(
+					pclmulqdq00(aw2, h2w),
+					pclmulqdq00(aw3, h1w)));
+			t2 = _mm_xor_si128(
+				_mm_xor_si128(
+					pclmulqdq00(ax0, h4x),
+					pclmulqdq00(ax1, h3x)),
+				_mm_xor_si128(
+					pclmulqdq00(ax2, h2x),
+					pclmulqdq00(ax3, h1x)));
+			t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3));
+			t0 = _mm_shuffle_epi32(t1, 0x0E);
+			t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+			t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+			SL_256(t0, t1, t2, t3);
+			REDUCE_F128(t0, t1, t2, t3);
+			yw = _mm_unpacklo_epi64(t1, t0);
+		}
+	}
+
+	while (num1 -- > 0) {
+		__m128i aw, ax;
+		__m128i t0, t1, t2, t3;
+
+		aw = _mm_loadu_si128((void *)buf2);
+		BYTESWAP(aw);
+		buf2 += 16;
+
+		aw = _mm_xor_si128(aw, yw);
+		BK(aw, ax);
+
+		t1 = pclmulqdq11(aw, h1w);
+		t3 = pclmulqdq00(aw, h1w);
+		t2 = pclmulqdq00(ax, h1x);
+		t2 = _mm_xor_si128(t2, _mm_xor_si128(t1, t3));
+		t0 = _mm_shuffle_epi32(t1, 0x0E);
+		t1 = _mm_xor_si128(t1, _mm_shuffle_epi32(t2, 0x0E));
+		t2 = _mm_xor_si128(t2, _mm_shuffle_epi32(t3, 0x0E));
+		SL_256(t0, t1, t2, t3);
+		REDUCE_F128(t0, t1, t2, t3);
+		yw = _mm_unpacklo_epi64(t1, t0);
+	}
+
+	BYTESWAP(yw);
+	_mm_storeu_si128(y, yw);
+}
+
+BR_TARGETS_X86_DOWN
+
+#else
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pclmul_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/ghash_pwr8.c b/third_party/bearssl/src/ghash_pwr8.c
new file mode 100644
index 0000000..2e7b0f4
--- /dev/null
+++ b/third_party/bearssl/src/ghash_pwr8.c
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_POWER_ASM_MACROS   1
+#include "inner.h"
+
+/*
+ * This is the GHASH implementation that leverages the POWER8 opcodes.
+ */
+
+#if BR_POWER8
+
+/*
+ * Some symbolic names for registers.
+ *   HB0 = 16 bytes of value 0
+ *   HB1 = 16 bytes of value 1
+ *   HB2 = 16 bytes of value 2
+ *   HB6 = 16 bytes of value 6
+ *   HB7 = 16 bytes of value 7
+ *   TT0, TT1 and TT2 are temporaries
+ *
+ * BSW holds the pattern for byteswapping 32-bit words; this is set only
+ * on little-endian systems. XBSW is the same register with the +32 offset
+ * for access with the VSX opcodes.
+ */
+#define HB0     0
+#define HB1     1
+#define HB2     2
+#define HB6     3
+#define HB7     4
+#define TT0     5
+#define TT1     6
+#define TT2     7
+
+#define BSW     8
+#define XBSW   40
+
+/*
+ * Macro to initialise the constants.
+ */
+#define INIT \
+		vxor(HB0, HB0, HB0) \
+		vspltisb(HB1, 1) \
+		vspltisb(HB2, 2) \
+		vspltisb(HB6, 6) \
+		vspltisb(HB7, 7) \
+		INIT_BSW
+
+/*
+ * Fix endianness of a value after reading it or before writing it, if
+ * necessary.
+ */
+#if BR_POWER8_LE
+#define INIT_BSW         lxvw4x(XBSW, 0, %[idx2be])
+#define FIX_ENDIAN(xx)   vperm(xx, xx, xx, BSW)
+#else
+#define INIT_BSW
+#define FIX_ENDIAN(xx)
+#endif
+
+/*
+ * Left-shift x0:x1 by one bit to the left. This is a corrective action
+ * needed because GHASH is defined in full little-endian specification,
+ * while the opcodes use full big-endian convention, so the 255-bit product
+ * ends up one bit to the right.
+ */
+#define SL_256(x0, x1) \
+		vsldoi(TT0, HB0, x1, 1) \
+		vsl(x0, x0, HB1) \
+		vsr(TT0, TT0, HB7) \
+		vsl(x1, x1, HB1) \
+		vxor(x0, x0, TT0)
+
+/*
+ * Reduce x0:x1 in GF(2^128), result in xd (register xd may be the same as
+ * x0 or x1, or a different register). x0 and x1 are modified.
+ */
+#define REDUCE_F128(xd, x0, x1) \
+		vxor(x0, x0, x1) \
+		vsr(TT0, x1, HB1) \
+		vsr(TT1, x1, HB2) \
+		vsr(TT2, x1, HB7) \
+		vxor(x0, x0, TT0) \
+		vxor(TT1, TT1, TT2) \
+		vxor(x0, x0, TT1) \
+		vsldoi(x1, x1, HB0, 15) \
+		vsl(TT1, x1, HB6) \
+		vsl(TT2, x1, HB1) \
+		vxor(x1, TT1, TT2) \
+		vsr(TT0, x1, HB1) \
+		vsr(TT1, x1, HB2) \
+		vsr(TT2, x1, HB7) \
+		vxor(x0, x0, x1) \
+		vxor(x0, x0, TT0) \
+		vxor(TT1, TT1, TT2) \
+		vxor(xd, x0, TT1)
+
+/* see bearssl_hash.h */
+void
+br_ghash_pwr8(void *y, const void *h, const void *data, size_t len)
+{
+	const unsigned char *buf1, *buf2;
+	size_t num4, num1;
+	unsigned char tmp[64];
+	long cc0, cc1, cc2, cc3;
+
+#if BR_POWER8_LE
+	static const uint32_t idx2be[] = {
+		0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
+	};
+#endif
+
+	buf1 = data;
+
+	/*
+	 * Assembly code requires data into two chunks; first chunk
+	 * must contain a number of blocks which is a multiple of 4.
+	 * Since the processing for the first chunk is faster, we want
+	 * to make it as big as possible.
+	 *
+	 * For the remainder, there are two possibilities:
+	 *  -- if the remainder size is a multiple of 16, then use it
+	 *     in place;
+	 *  -- otherwise, copy it to the tmp[] array and pad it with
+	 *     zeros.
+	 */
+	num4 = len >> 6;
+	buf2 = buf1 + (num4 << 6);
+	len &= 63;
+	num1 = (len + 15) >> 4;
+	if ((len & 15) != 0) {
+		memcpy(tmp, buf2, len);
+		memset(tmp + len, 0, (num1 << 4) - len);
+		buf2 = tmp;
+	}
+
+	cc0 =  0;
+	cc1 = 16;
+	cc2 = 32;
+	cc3 = 48;
+	asm volatile (
+		INIT
+
+		/*
+		 * Load current h (denoted hereafter h1) in v9.
+		 */
+		lxvw4x(41, 0, %[h])
+		FIX_ENDIAN(9)
+
+		/*
+		 * Load current y into v28.
+		 */
+		lxvw4x(60, 0, %[y])
+		FIX_ENDIAN(28)
+
+		/*
+		 * Split h1 into three registers:
+		 *   v17 = h1_1:h1_0
+		 *   v18 =    0:h1_0
+		 *   v19 = h1_1:0
+		 */
+		xxpermdi(49, 41, 41, 2)
+		vsldoi(18, HB0, 9, 8)
+		vsldoi(19, 9, HB0, 8)
+
+		/*
+		 * If num4 is 0, skip directly to the second chunk.
+		 */
+		cmpldi(%[num4], 0)
+		beq(chunk1)
+
+		/*
+		 * Compute h2 = h*h in v10.
+		 */
+		vpmsumd(10, 18, 18)
+		vpmsumd(11, 19, 19)
+		SL_256(10, 11)
+		REDUCE_F128(10, 10, 11)
+
+		/*
+		 * Compute h3 = h*h*h in v11.
+		 * We first split h2 into:
+		 *   v10 = h2_0:h2_1
+		 *   v11 =    0:h2_0
+		 *   v12 = h2_1:0
+		 * Then we do the product with h1, and reduce into v11.
+		 */
+		vsldoi(11, HB0, 10, 8)
+		vsldoi(12, 10, HB0, 8)
+		vpmsumd(13, 10, 17)
+		vpmsumd(11, 11, 18)
+		vpmsumd(12, 12, 19)
+		vsldoi(14, HB0, 13, 8)
+		vsldoi(15, 13, HB0, 8)
+		vxor(11, 11, 14)
+		vxor(12, 12, 15)
+		SL_256(11, 12)
+		REDUCE_F128(11, 11, 12)
+
+		/*
+		 * Compute h4 = h*h*h*h in v12. This is done by squaring h2.
+		 */
+		vsldoi(12, HB0, 10, 8)
+		vsldoi(13, 10, HB0, 8)
+		vpmsumd(12, 12, 12)
+		vpmsumd(13, 13, 13)
+		SL_256(12, 13)
+		REDUCE_F128(12, 12, 13)
+
+		/*
+		 * Repack h1, h2, h3 and h4:
+		 *   v13 = h4_0:h3_0
+		 *   v14 = h4_1:h3_1
+		 *   v15 = h2_0:h1_0
+		 *   v16 = h2_1:h1_1
+		 */
+		xxpermdi(45, 44, 43, 0)
+		xxpermdi(46, 44, 43, 3)
+		xxpermdi(47, 42, 41, 0)
+		xxpermdi(48, 42, 41, 3)
+
+		/*
+		 * Loop for each group of four blocks.
+		 */
+		mtctr(%[num4])
+	label(loop4)
+		/*
+		 * Read the four next blocks.
+		 *   v20 = y + a0 = b0
+		 *   v21 = a1     = b1
+		 *   v22 = a2     = b2
+		 *   v23 = a3     = b3
+		 */
+		lxvw4x(52, %[cc0], %[buf1])
+		lxvw4x(53, %[cc1], %[buf1])
+		lxvw4x(54, %[cc2], %[buf1])
+		lxvw4x(55, %[cc3], %[buf1])
+		FIX_ENDIAN(20)
+		FIX_ENDIAN(21)
+		FIX_ENDIAN(22)
+		FIX_ENDIAN(23)
+		addi(%[buf1], %[buf1], 64)
+		vxor(20, 20, 28)
+
+		/*
+		 * Repack the blocks into v9, v10, v11 and v12.
+		 *   v9  = b0_0:b1_0
+		 *   v10 = b0_1:b1_1
+		 *   v11 = b2_0:b3_0
+		 *   v12 = b2_1:b3_1
+		 */
+		xxpermdi(41, 52, 53, 0)
+		xxpermdi(42, 52, 53, 3)
+		xxpermdi(43, 54, 55, 0)
+		xxpermdi(44, 54, 55, 3)
+
+		/*
+		 * Compute the products.
+		 *   v20 = b0_0*h4_0 + b1_0*h3_0
+		 *   v21 = b0_1*h4_0 + b1_1*h3_0
+		 *   v22 = b0_0*h4_1 + b1_0*h3_1
+		 *   v23 = b0_1*h4_1 + b1_1*h3_1
+		 *   v24 = b2_0*h2_0 + b3_0*h1_0
+		 *   v25 = b2_1*h2_0 + b3_1*h1_0
+		 *   v26 = b2_0*h2_1 + b3_0*h1_1
+		 *   v27 = b2_1*h2_1 + b3_1*h1_1
+		 */
+		vpmsumd(20, 13,  9)
+		vpmsumd(21, 13, 10)
+		vpmsumd(22, 14,  9)
+		vpmsumd(23, 14, 10)
+		vpmsumd(24, 15, 11)
+		vpmsumd(25, 15, 12)
+		vpmsumd(26, 16, 11)
+		vpmsumd(27, 16, 12)
+
+		/*
+		 * Sum products into a single 256-bit result in v11:v12.
+		 */
+		vxor(11, 20, 24)
+		vxor(12, 23, 27)
+		vxor( 9, 21, 22)
+		vxor(10, 25, 26)
+		vxor(20,  9, 10)
+		vsldoi( 9, HB0, 20, 8)
+		vsldoi(10, 20, HB0, 8)
+		vxor(11, 11, 9)
+		vxor(12, 12, 10)
+
+		/*
+		 * Fix and reduce in GF(2^128); this is the new y (in v28).
+		 */
+		SL_256(11, 12)
+		REDUCE_F128(28, 11, 12)
+
+		/*
+		 * Loop for next group of four blocks.
+		 */
+		bdnz(loop4)
+
+		/*
+		 * Process second chunk, one block at a time.
+		 */
+	label(chunk1)
+		cmpldi(%[num1], 0)
+		beq(done)
+
+		mtctr(%[num1])
+	label(loop1)
+		/*
+		 * Load next data block and XOR it into y.
+		 */
+		lxvw4x(41, 0, %[buf2])
+#if BR_POWER8_LE
+		FIX_ENDIAN(9)
+#endif
+		addi(%[buf2], %[buf2], 16)
+		vxor(9, 28, 9)
+
+		/*
+		 * Split y into doublewords:
+		 *   v9  = y_0:y_1
+		 *   v10 =   0:y_0
+		 *   v11 = y_1:0
+		 */
+		vsldoi(10, HB0, 9, 8)
+		vsldoi(11, 9, HB0, 8)
+
+		/*
+		 * Compute products with h:
+		 *   v12 = y_0 * h_0
+		 *   v13 = y_1 * h_1
+		 *   v14 = y_1 * h_0 + y_0 * h_1
+		 */
+		vpmsumd(14,  9, 17)
+		vpmsumd(12, 10, 18)
+		vpmsumd(13, 11, 19)
+
+		/*
+		 * Propagate v14 into v12:v13 to finalise product.
+		 */
+		vsldoi(10, HB0, 14, 8)
+		vsldoi(11, 14, HB0, 8)
+		vxor(12, 12, 10)
+		vxor(13, 13, 11)
+
+		/*
+		 * Fix result and reduce into v28 (next value for y).
+		 */
+		SL_256(12, 13)
+		REDUCE_F128(28, 12, 13)
+		bdnz(loop1)
+
+	label(done)
+		/*
+		 * Write back the new y.
+		 */
+		FIX_ENDIAN(28)
+		stxvw4x(60, 0, %[y])
+
+: [buf1] "+b" (buf1), [buf2] "+b" (buf2)
+: [y] "b" (y), [h] "b" (h), [num4] "b" (num4), [num1] "b" (num1),
+  [cc0] "b" (cc0), [cc1] "b" (cc1), [cc2] "b" (cc2), [cc3] "b" (cc3)
+#if BR_POWER8_LE
+	, [idx2be] "b" (idx2be)
+#endif
+: "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
+  "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
+  "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
+  "ctr", "memory"
+	);
+}
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pwr8_get(void)
+{
+	return &br_ghash_pwr8;
+}
+
+#else
+
+/* see bearssl_hash.h */
+br_ghash
+br_ghash_pwr8_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/hkdf.c b/third_party/bearssl/src/hkdf.c
new file mode 100644
index 0000000..6a36851
--- /dev/null
+++ b/third_party/bearssl/src/hkdf.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+const unsigned char br_hkdf_no_salt = 0;
+
+/* see bearssl_kdf.h */
+void
+br_hkdf_init(br_hkdf_context *hc, const br_hash_class *digest_vtable,
+	const void *salt, size_t salt_len)
+{
+	br_hmac_key_context kc;
+	unsigned char tmp[64];
+
+	if (salt == BR_HKDF_NO_SALT) {
+		salt = tmp;
+		salt_len = br_digest_size(digest_vtable);
+		memset(tmp, 0, salt_len);
+	}
+	br_hmac_key_init(&kc, digest_vtable, salt, salt_len);
+	br_hmac_init(&hc->u.hmac_ctx, &kc, 0);
+	hc->dig_len = br_hmac_size(&hc->u.hmac_ctx);
+}
+
+/* see bearssl_kdf.h */
+void
+br_hkdf_inject(br_hkdf_context *hc, const void *ikm, size_t ikm_len)
+{
+	br_hmac_update(&hc->u.hmac_ctx, ikm, ikm_len);
+}
+
+/* see bearssl_kdf.h */
+void
+br_hkdf_flip(br_hkdf_context *hc)
+{
+	unsigned char tmp[64];
+
+	br_hmac_out(&hc->u.hmac_ctx, tmp);
+	br_hmac_key_init(&hc->u.prk_ctx,
+		br_hmac_get_digest(&hc->u.hmac_ctx), tmp, hc->dig_len);
+	hc->ptr = hc->dig_len;
+	hc->chunk_num = 0;
+}
+
+/* see bearssl_kdf.h */
+size_t
+br_hkdf_produce(br_hkdf_context *hc,
+	const void *info, size_t info_len, void *out, size_t out_len)
+{
+	size_t tlen;
+
+	tlen = 0;
+	while (out_len > 0) {
+		size_t clen;
+
+		if (hc->ptr == hc->dig_len) {
+			br_hmac_context hmac_ctx;
+			unsigned char x;
+
+			hc->chunk_num ++;
+			if (hc->chunk_num == 256) {
+				return tlen;
+			}
+			x = hc->chunk_num;
+			br_hmac_init(&hmac_ctx, &hc->u.prk_ctx, 0);
+			if (x != 1) {
+				br_hmac_update(&hmac_ctx, hc->buf, hc->dig_len);
+			}
+			br_hmac_update(&hmac_ctx, info, info_len);
+			br_hmac_update(&hmac_ctx, &x, 1);
+			br_hmac_out(&hmac_ctx, hc->buf);
+			hc->ptr = 0;
+		}
+		clen = hc->dig_len - hc->ptr;
+		if (clen > out_len) {
+			clen = out_len;
+		}
+		memcpy(out, hc->buf + hc->ptr, clen);
+		out = (unsigned char *)out + clen;
+		out_len -= clen;
+		hc->ptr += clen;
+		tlen += clen;
+	}
+	return tlen;
+}
diff --git a/third_party/bearssl/src/hmac.c b/third_party/bearssl/src/hmac.c
new file mode 100644
index 0000000..b438798
--- /dev/null
+++ b/third_party/bearssl/src/hmac.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline size_t
+block_size(const br_hash_class *dig)
+{
+	unsigned ls;
+	
+	ls = (unsigned)(dig->desc >> BR_HASHDESC_LBLEN_OFF)
+		& BR_HASHDESC_LBLEN_MASK;
+	return (size_t)1 << ls;
+}
+
+static void
+process_key(const br_hash_class **hc, void *ks,
+	const void *key, size_t key_len, unsigned bb)
+{
+	unsigned char tmp[256];
+	size_t blen, u;
+
+	blen = block_size(*hc);
+	memcpy(tmp, key, key_len);
+	for (u = 0; u < key_len; u ++) {
+		tmp[u] ^= (unsigned char)bb;
+	}
+	memset(tmp + key_len, bb, blen - key_len);
+	(*hc)->init(hc);
+	(*hc)->update(hc, tmp, blen);
+	(*hc)->state(hc, ks);
+}
+
+/* see bearssl.h */
+void
+br_hmac_key_init(br_hmac_key_context *kc,
+	const br_hash_class *dig, const void *key, size_t key_len)
+{
+	br_hash_compat_context hc;
+	unsigned char kbuf[64];
+
+	kc->dig_vtable = dig;
+	hc.vtable = dig;
+	if (key_len > block_size(dig)) {
+		dig->init(&hc.vtable);
+		dig->update(&hc.vtable, key, key_len);
+		dig->out(&hc.vtable, kbuf);
+		key = kbuf;
+		key_len = br_digest_size(dig);
+	}
+	process_key(&hc.vtable, kc->ksi, key, key_len, 0x36);
+	process_key(&hc.vtable, kc->kso, key, key_len, 0x5C);
+}
+
+/* see bearssl.h */
+void
+br_hmac_init(br_hmac_context *ctx,
+	const br_hmac_key_context *kc, size_t out_len)
+{
+	const br_hash_class *dig;
+	size_t blen, hlen;
+
+	dig = kc->dig_vtable;
+	blen = block_size(dig);
+	dig->init(&ctx->dig.vtable);
+	dig->set_state(&ctx->dig.vtable, kc->ksi, (uint64_t)blen);
+	memcpy(ctx->kso, kc->kso, sizeof kc->kso);
+	hlen = br_digest_size(dig);
+	if (out_len > 0 && out_len < hlen) {
+		hlen = out_len;
+	}
+	ctx->out_len = hlen;
+}
+
+/* see bearssl.h */
+void
+br_hmac_update(br_hmac_context *ctx, const void *data, size_t len)
+{
+	ctx->dig.vtable->update(&ctx->dig.vtable, data, len);
+}
+
+/* see bearssl.h */
+size_t
+br_hmac_out(const br_hmac_context *ctx, void *out)
+{
+	const br_hash_class *dig;
+	br_hash_compat_context hc;
+	unsigned char tmp[64];
+	size_t blen, hlen;
+
+	dig = ctx->dig.vtable;
+	dig->out(&ctx->dig.vtable, tmp);
+	blen = block_size(dig);
+	dig->init(&hc.vtable);
+	dig->set_state(&hc.vtable, ctx->kso, (uint64_t)blen);
+	hlen = br_digest_size(dig);
+	dig->update(&hc.vtable, tmp, hlen);
+	dig->out(&hc.vtable, tmp);
+	memcpy(out, tmp, ctx->out_len);
+	return ctx->out_len;
+}
diff --git a/third_party/bearssl/src/hmac_ct.c b/third_party/bearssl/src/hmac_ct.c
new file mode 100644
index 0000000..e1c1d80
--- /dev/null
+++ b/third_party/bearssl/src/hmac_ct.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static inline size_t
+hash_size(const br_hash_class *dig)
+{
+	return (unsigned)(dig->desc >> BR_HASHDESC_OUT_OFF)
+		& BR_HASHDESC_OUT_MASK;
+}
+
+static inline size_t
+block_size(const br_hash_class *dig)
+{
+	unsigned ls;
+	
+	ls = (unsigned)(dig->desc >> BR_HASHDESC_LBLEN_OFF)
+		& BR_HASHDESC_LBLEN_MASK;
+	return (size_t)1 << ls;
+}
+
+/* see bearssl.h */
+size_t
+br_hmac_outCT(const br_hmac_context *ctx,
+	const void *data, size_t len, size_t min_len, size_t max_len,
+	void *out)
+{
+	/*
+	 * Method implemented here is inspired from the descriptions on:
+	 *    https://www.imperialviolet.org/2013/02/04/luckythirteen.html
+	 *
+	 * Principle: we input bytes one by one. We use a MUX to push
+	 * padding bytes instead of data bytes when appropriate. At each
+	 * block limit, we get the current hash function state: this is
+	 * a potential output, since we handle MD padding ourselves.
+	 *
+	 * be     1 for big-endian, 0 for little-endian
+	 * po     minimal MD padding length
+	 * bs     block size (always a power of 2)
+	 * hlen   hash output size
+	 */
+
+	const br_hash_class *dig;
+	br_hash_compat_context hc;
+	int be;
+	uint32_t po, bs;
+	uint32_t kr, km, kl, kz, u;
+	uint64_t count, ncount, bit_len;
+	unsigned char tmp1[64], tmp2[64];
+	size_t hlen;
+
+	/*
+	 * Copy the current hash context.
+	 */
+	hc = ctx->dig;
+
+	/*
+	 * Get function-specific information.
+	 */
+	dig = hc.vtable;
+	be = (dig->desc & BR_HASHDESC_MD_PADDING_BE) != 0;
+	po = 9;
+	if (dig->desc & BR_HASHDESC_MD_PADDING_128) {
+		po += 8;
+	}
+	bs = block_size(dig);
+	hlen = hash_size(dig);
+
+	/*
+	 * Get current input length and compute total bit length.
+	 */
+	count = dig->state(&hc.vtable, tmp1);
+	bit_len = (count + (uint64_t)len) << 3;
+
+	/*
+	 * We can input the blocks that we are sure we will use.
+	 * This offers better performance (no MUX for these blocks)
+	 * and also ensures that the remaining lengths fit on 32 bits.
+	 */
+	ncount = (count + (uint64_t)min_len) & ~(uint64_t)(bs - 1);
+	if (ncount > count) {
+		size_t zlen;
+
+		zlen = (size_t)(ncount - count);
+		dig->update(&hc.vtable, data, zlen);
+		data = (const unsigned char *)data + zlen;
+		len -= zlen;
+		max_len -= zlen;
+		count = ncount;
+	}
+
+	/*
+	 * At that point:
+	 * -- 'count' contains the number of bytes already processed
+	 * (in total).
+	 * -- We must input 'len' bytes. 'min_len' is unimportant: we
+	 * used it to know how many full blocks we could process
+	 * directly. Now only len and max_len matter.
+	 *
+	 * We compute kr, kl, kz and km.
+	 *  kr   number of input bytes already in the current block
+	 *  km   index of the first byte after the end of the last padding
+	 *       block, if length is max_len
+	 *  kz   index of the last byte of the actual last padding block
+	 *  kl   index of the start of the encoded length
+	 *
+	 * km, kz and kl are counted from the current offset in the
+	 * input data.
+	 */
+	kr = (uint32_t)count & (bs - 1);
+	kz = ((kr + (uint32_t)len + po + bs - 1) & ~(bs - 1)) - 1 - kr;
+	kl = kz - 7;
+	km = ((kr + (uint32_t)max_len + po + bs - 1) & ~(bs - 1)) - kr;
+
+	/*
+	 * We must now process km bytes. For index u from 0 to km-1:
+	 *   d is from data[] if u < max_len, 0x00 otherwise
+	 *   e is an encoded length byte or 0x00, depending on u
+	 * The tests for d and e need not be constant-time, since
+	 * they relate only to u and max_len, not to the actual length.
+	 *
+	 * Actual input length is then:
+	 *   d      if u < len
+	 *   0x80   if u == len
+	 *   0x00   if u > len and u < kl
+	 *   e      if u >= kl
+	 *
+	 * Hash state is obtained whenever we reach a full block. This
+	 * is the result we want if and only if u == kz.
+	 */
+	memset(tmp2, 0, sizeof tmp2);
+	for (u = 0; u < km; u ++) {
+		uint32_t v;
+		uint32_t d, e, x0, x1;
+		unsigned char x[1];
+
+		d = (u < max_len) ? ((const unsigned char *)data)[u] : 0x00;
+		v = (kr + u) & (bs - 1);
+		if (v >= (bs - 8)) {
+			unsigned j;
+
+			j = (v - (bs - 8)) << 3;
+			if (be) {
+				e = (uint32_t)(bit_len >> (56 - j));
+			} else {
+				e = (uint32_t)(bit_len >> j);
+			}
+			e &= 0xFF;
+		} else {
+			e = 0x00;
+		}
+		x0 = MUX(EQ(u, (uint32_t)len), 0x80, d);
+		x1 = MUX(LT(u, kl), 0x00, e);
+		x[0] = MUX(LE(u, (uint32_t)len), x0, x1);
+		dig->update(&hc.vtable, x, 1);
+		if (v == (bs - 1)) {
+			dig->state(&hc.vtable, tmp1);
+			CCOPY(EQ(u, kz), tmp2, tmp1, hlen);
+		}
+	}
+
+	/*
+	 * Inner hash output is in tmp2[]; we finish processing.
+	 */
+	dig->init(&hc.vtable);
+	dig->set_state(&hc.vtable, ctx->kso, (uint64_t)bs);
+	dig->update(&hc.vtable, tmp2, hlen);
+	dig->out(&hc.vtable, tmp2);
+	memcpy(out, tmp2, ctx->out_len);
+	return ctx->out_len;
+}
diff --git a/third_party/bearssl/src/hmac_drbg.c b/third_party/bearssl/src/hmac_drbg.c
new file mode 100644
index 0000000..d746756
--- /dev/null
+++ b/third_party/bearssl/src/hmac_drbg.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_hmac_drbg_init(br_hmac_drbg_context *ctx,
+	const br_hash_class *digest_class, const void *seed, size_t len)
+{
+	size_t hlen;
+
+	ctx->vtable = &br_hmac_drbg_vtable;
+	hlen = br_digest_size(digest_class);
+	memset(ctx->K, 0x00, hlen);
+	memset(ctx->V, 0x01, hlen);
+	ctx->digest_class = digest_class;
+	br_hmac_drbg_update(ctx, seed, len);
+}
+
+/* see bearssl.h */
+void
+br_hmac_drbg_generate(br_hmac_drbg_context *ctx, void *out, size_t len)
+{
+	const br_hash_class *dig;
+	br_hmac_key_context kc;
+	br_hmac_context hc;
+	size_t hlen;
+	unsigned char *buf;
+	unsigned char x;
+
+	dig = ctx->digest_class;
+	hlen = br_digest_size(dig);
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+	buf = out;
+	while (len > 0) {
+		size_t clen;
+
+		br_hmac_init(&hc, &kc, 0);
+		br_hmac_update(&hc, ctx->V, hlen);
+		br_hmac_out(&hc, ctx->V);
+		clen = hlen;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(buf, ctx->V, clen);
+		buf += clen;
+		len -= clen;
+	}
+
+	/*
+	 * To prepare the state for the next request, we should call
+	 * br_hmac_drbg_update() with an empty additional seed. However,
+	 * we already have an initialized HMAC context with the right
+	 * initial key, and we don't want to push another one on the
+	 * stack, so we inline that update() call here.
+	 */
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	x = 0x00;
+	br_hmac_update(&hc, &x, 1);
+	br_hmac_out(&hc, ctx->K);
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	br_hmac_out(&hc, ctx->V);
+}
+
+/* see bearssl.h */
+void
+br_hmac_drbg_update(br_hmac_drbg_context *ctx, const void *seed, size_t len)
+{
+	const br_hash_class *dig;
+	br_hmac_key_context kc;
+	br_hmac_context hc;
+	size_t hlen;
+	unsigned char x;
+
+	dig = ctx->digest_class;
+	hlen = br_digest_size(dig);
+
+	/*
+	 * 1. K = HMAC(K, V || 0x00 || seed)
+	 */
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	x = 0x00;
+	br_hmac_update(&hc, &x, 1);
+	br_hmac_update(&hc, seed, len);
+	br_hmac_out(&hc, ctx->K);
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+
+	/*
+	 * 2. V = HMAC(K, V)
+	 */
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	br_hmac_out(&hc, ctx->V);
+
+	/*
+	 * 3. If the additional seed is empty, then stop here.
+	 */
+	if (len == 0) {
+		return;
+	}
+
+	/*
+	 * 4. K = HMAC(K, V || 0x01 || seed)
+	 */
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	x = 0x01;
+	br_hmac_update(&hc, &x, 1);
+	br_hmac_update(&hc, seed, len);
+	br_hmac_out(&hc, ctx->K);
+	br_hmac_key_init(&kc, dig, ctx->K, hlen);
+
+	/*
+	 * 5. V = HMAC(K, V)
+	 */
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, ctx->V, hlen);
+	br_hmac_out(&hc, ctx->V);
+}
+
+/* see bearssl.h */
+const br_prng_class br_hmac_drbg_vtable = {
+	sizeof(br_hmac_drbg_context),
+	(void (*)(const br_prng_class **, const void *, const void *, size_t))
+		&br_hmac_drbg_init,
+	(void (*)(const br_prng_class **, void *, size_t))
+		&br_hmac_drbg_generate,
+	(void (*)(const br_prng_class **, const void *, size_t))
+		&br_hmac_drbg_update
+};
diff --git a/third_party/bearssl/src/i15_add.c b/third_party/bearssl/src/i15_add.c
new file mode 100644
index 0000000..97e29b8
--- /dev/null
+++ b/third_party/bearssl/src/i15_add.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 31) >> 4;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw + bw + cc;
+		cc = naw >> 15;
+		a[u] = MUX(ctl, naw & 0x7FFF, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i15_bitlen.c b/third_party/bearssl/src/i15_bitlen.c
new file mode 100644
index 0000000..ad74467
--- /dev/null
+++ b/third_party/bearssl/src/i15_bitlen.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_bit_length(uint16_t *x, size_t xlen)
+{
+	uint32_t tw, twk;
+
+	tw = 0;
+	twk = 0;
+	while (xlen -- > 0) {
+		uint32_t w, c;
+
+		c = EQ(tw, 0);
+		w = x[xlen];
+		tw = MUX(c, w, tw);
+		twk = MUX(c, (uint32_t)xlen, twk);
+	}
+	return (twk << 4) + BIT_LENGTH(tw);
+}
diff --git a/third_party/bearssl/src/i15_decmod.c b/third_party/bearssl/src/i15_decmod.c
new file mode 100644
index 0000000..6076c57
--- /dev/null
+++ b/third_party/bearssl/src/i15_decmod.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_decode_mod(uint16_t *x, const void *src, size_t len, const uint16_t *m)
+{
+	/*
+	 * Two-pass algorithm: in the first pass, we determine whether the
+	 * value fits; in the second pass, we do the actual write.
+	 *
+	 * During the first pass, 'r' contains the comparison result so
+	 * far:
+	 *  0x00000000   value is equal to the modulus
+	 *  0x00000001   value is greater than the modulus
+	 *  0xFFFFFFFF   value is lower than the modulus
+	 *
+	 * Since we iterate starting with the least significant bytes (at
+	 * the end of src[]), each new comparison overrides the previous
+	 * except when the comparison yields 0 (equal).
+	 *
+	 * During the second pass, 'r' is either 0xFFFFFFFF (value fits)
+	 * or 0x00000000 (value does not fit).
+	 *
+	 * We must iterate over all bytes of the source, _and_ possibly
+	 * some extra virtual bytes (with value 0) so as to cover the
+	 * complete modulus as well. We also add 4 such extra bytes beyond
+	 * the modulus length because it then guarantees that no accumulated
+	 * partial word remains to be processed.
+	 */
+	const unsigned char *buf;
+	size_t mlen, tlen;
+	int pass;
+	uint32_t r;
+
+	buf = src;
+	mlen = (m[0] + 15) >> 4;
+	tlen = (mlen << 1);
+	if (tlen < len) {
+		tlen = len;
+	}
+	tlen += 4;
+	r = 0;
+	for (pass = 0; pass < 2; pass ++) {
+		size_t u, v;
+		uint32_t acc;
+		int acc_len;
+
+		v = 1;
+		acc = 0;
+		acc_len = 0;
+		for (u = 0; u < tlen; u ++) {
+			uint32_t b;
+
+			if (u < len) {
+				b = buf[len - 1 - u];
+			} else {
+				b = 0;
+			}
+			acc |= (b << acc_len);
+			acc_len += 8;
+			if (acc_len >= 15) {
+				uint32_t xw;
+
+				xw = acc & (uint32_t)0x7FFF;
+				acc_len -= 15;
+				acc = b >> (8 - acc_len);
+				if (v <= mlen) {
+					if (pass) {
+						x[v] = r & xw;
+					} else {
+						uint32_t cc;
+
+						cc = (uint32_t)CMP(xw, m[v]);
+						r = MUX(EQ(cc, 0), r, cc);
+					}
+				} else {
+					if (!pass) {
+						r = MUX(EQ(xw, 0), r, 1);
+					}
+				}
+				v ++;
+			}
+		}
+
+		/*
+		 * When we reach this point at the end of the first pass:
+		 * r is either 0, 1 or -1; we want to set r to 0 if it
+		 * is equal to 0 or 1, and leave it to -1 otherwise.
+		 *
+		 * When we reach this point at the end of the second pass:
+		 * r is either 0 or -1; we want to leave that value
+		 * untouched. This is a subcase of the previous.
+		 */
+		r >>= 1;
+		r |= (r << 1);
+	}
+
+	x[0] = m[0];
+	return r & (uint32_t)1;
+}
diff --git a/third_party/bearssl/src/i15_decode.c b/third_party/bearssl/src/i15_decode.c
new file mode 100644
index 0000000..fc2c0be
--- /dev/null
+++ b/third_party/bearssl/src/i15_decode.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_decode(uint16_t *x, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	size_t v;
+	uint32_t acc;
+	int acc_len;
+
+	buf = src;
+	v = 1;
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		uint32_t b;
+
+		b = buf[len];
+		acc |= (b << acc_len);
+		acc_len += 8;
+		if (acc_len >= 15) {
+			x[v ++] = acc & 0x7FFF;
+			acc_len -= 15;
+			acc >>= 15;
+		}
+	}
+	if (acc_len != 0) {
+		x[v ++] = acc;
+	}
+	x[0] = br_i15_bit_length(x + 1, v - 1);
+}
diff --git a/third_party/bearssl/src/i15_decred.c b/third_party/bearssl/src/i15_decred.c
new file mode 100644
index 0000000..81e7dd1
--- /dev/null
+++ b/third_party/bearssl/src/i15_decred.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_decode_reduce(uint16_t *x,
+	const void *src, size_t len, const uint16_t *m)
+{
+	uint32_t m_ebitlen, m_rbitlen;
+	size_t mblen, k;
+	const unsigned char *buf;
+	uint32_t acc;
+	int acc_len;
+
+	/*
+	 * Get the encoded bit length.
+	 */
+	m_ebitlen = m[0];
+
+	/*
+	 * Special case for an invalid (null) modulus.
+	 */
+	if (m_ebitlen == 0) {
+		x[0] = 0;
+		return;
+	}
+
+	/*
+	 * Clear the destination.
+	 */
+	br_i15_zero(x, m_ebitlen);
+
+	/*
+	 * First decode directly as many bytes as possible. This requires
+	 * computing the actual bit length.
+	 */
+	m_rbitlen = m_ebitlen >> 4;
+	m_rbitlen = (m_ebitlen & 15) + (m_rbitlen << 4) - m_rbitlen;
+	mblen = (m_rbitlen + 7) >> 3;
+	k = mblen - 1;
+	if (k >= len) {
+		br_i15_decode(x, src, len);
+		x[0] = m_ebitlen;
+		return;
+	}
+	buf = src;
+	br_i15_decode(x, buf, k);
+	x[0] = m_ebitlen;
+
+	/*
+	 * Input remaining bytes, using 15-bit words.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (k < len) {
+		uint32_t v;
+
+		v = buf[k ++];
+		acc = (acc << 8) | v;
+		acc_len += 8;
+		if (acc_len >= 15) {
+			br_i15_muladd_small(x, acc >> (acc_len - 15), m);
+			acc_len -= 15;
+			acc &= ~((uint32_t)-1 << acc_len);
+		}
+	}
+
+	/*
+	 * We may have some bits accumulated. We then perform a shift to
+	 * be able to inject these bits as a full 15-bit word.
+	 */
+	if (acc_len != 0) {
+		acc = (acc | (x[1] << acc_len)) & 0x7FFF;
+		br_i15_rshift(x, 15 - acc_len);
+		br_i15_muladd_small(x, acc, m);
+	}
+}
diff --git a/third_party/bearssl/src/i15_encode.c b/third_party/bearssl/src/i15_encode.c
new file mode 100644
index 0000000..50668f4
--- /dev/null
+++ b/third_party/bearssl/src/i15_encode.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_encode(void *dst, size_t len, const uint16_t *x)
+{
+	unsigned char *buf;
+	size_t u, xlen;
+	uint32_t acc;
+	int acc_len;
+
+	xlen = (x[0] + 15) >> 4;
+	if (xlen == 0) {
+		memset(dst, 0, len);
+		return;
+	}
+	u = 1;
+	acc = 0;
+	acc_len = 0;
+	buf = dst;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			if (u <= xlen) {
+				acc += (uint32_t)x[u ++] << acc_len;
+			}
+			acc_len += 15;
+		}
+		buf[len] = (unsigned char)acc;
+		acc >>= 8;
+		acc_len -= 8;
+	}
+}
diff --git a/third_party/bearssl/src/i15_fmont.c b/third_party/bearssl/src/i15_fmont.c
new file mode 100644
index 0000000..3450b72
--- /dev/null
+++ b/third_party/bearssl/src/i15_fmont.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i)
+{
+	size_t len, u, v;
+
+	len = (m[0] + 15) >> 4;
+	for (u = 0; u < len; u ++) {
+		uint32_t f, cc;
+
+		f = MUL15(x[1], m0i) & 0x7FFF;
+		cc = 0;
+		for (v = 0; v < len; v ++) {
+			uint32_t z;
+
+			z = (uint32_t)x[v + 1] + MUL15(f, m[v + 1]) + cc;
+			cc = z >> 15;
+			if (v != 0) {
+				x[v] = z & 0x7FFF;
+			}
+		}
+		x[len] = cc;
+	}
+
+	/*
+	 * We may have to do an extra subtraction, but only if the
+	 * value in x[] is indeed greater than or equal to that of m[],
+	 * which is why we must do two calls (first call computes the
+	 * carry, second call performs the subtraction only if the carry
+	 * is 0).
+	 */
+	br_i15_sub(x, m, NOT(br_i15_sub(x, m, 0)));
+}
diff --git a/third_party/bearssl/src/i15_iszero.c b/third_party/bearssl/src/i15_iszero.c
new file mode 100644
index 0000000..d4b6f10
--- /dev/null
+++ b/third_party/bearssl/src/i15_iszero.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_iszero(const uint16_t *x)
+{
+	uint32_t z;
+	size_t u;
+
+	z = 0;
+	for (u = (x[0] + 15) >> 4; u > 0; u --) {
+		z |= x[u];
+	}
+	return ~(z | -z) >> 31;
+}
diff --git a/third_party/bearssl/src/i15_moddiv.c b/third_party/bearssl/src/i15_moddiv.c
new file mode 100644
index 0000000..45af756
--- /dev/null
+++ b/third_party/bearssl/src/i15_moddiv.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * In this file, we handle big integers with a custom format, i.e.
+ * without the usual one-word header. Value is split into 15-bit words,
+ * each stored in a 16-bit slot (top bit is zero) in little-endian
+ * order. The length (in words) is provided explicitly. In some cases,
+ * the value can be negative (using two's complement representation). In
+ * some cases, the top word is allowed to have a 16th bit.
+ */
+
+/*
+ * Negate big integer conditionally. The value consists of 'len' words,
+ * with 15 bits in each word (the top bit of each word should be 0,
+ * except possibly for the last word). If 'ctl' is 1, the negation is
+ * computed; otherwise, if 'ctl' is 0, then the value is unchanged.
+ */
+static void
+cond_negate(uint16_t *a, size_t len, uint32_t ctl)
+{
+	size_t k;
+	uint32_t cc, xm;
+
+	cc = ctl;
+	xm = 0x7FFF & -ctl;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw;
+
+		aw = a[k];
+		aw = (aw ^ xm) + cc;
+		a[k] = aw & 0x7FFF;
+		cc = (aw >> 15) & 1;
+	}
+}
+
+/*
+ * Finish modular reduction. Rules on input parameters:
+ *
+ *   if neg = 1, then -m <= a < 0
+ *   if neg = 0, then 0 <= a < 2*m
+ *
+ * If neg = 0, then the top word of a[] may use 16 bits.
+ *
+ * Also, modulus m must be odd.
+ */
+static void
+finish_mod(uint16_t *a, size_t len, const uint16_t *m, uint32_t neg)
+{
+	size_t k;
+	uint32_t cc, xm, ym;
+
+	/*
+	 * First pass: compare a (assumed nonnegative) with m.
+	 */
+	cc = 0;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw, mw;
+
+		aw = a[k];
+		mw = m[k];
+		cc = (aw - mw - cc) >> 31;
+	}
+
+	/*
+	 * At this point:
+	 *   if neg = 1, then we must add m (regardless of cc)
+	 *   if neg = 0 and cc = 0, then we must subtract m
+	 *   if neg = 0 and cc = 1, then we must do nothing
+	 */
+	xm = 0x7FFF & -neg;
+	ym = -(neg | (1 - cc));
+	cc = neg;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw, mw;
+
+		aw = a[k];
+		mw = (m[k] ^ xm) & ym;
+		aw = aw - mw - cc;
+		a[k] = aw & 0x7FFF;
+		cc = aw >> 31;
+	}
+}
+
+/*
+ * Compute:
+ *   a <- (a*pa+b*pb)/(2^15)
+ *   b <- (a*qa+b*qb)/(2^15)
+ * The division is assumed to be exact (i.e. the low word is dropped).
+ * If the final a is negative, then it is negated. Similarly for b.
+ * Returned value is the combination of two bits:
+ *   bit 0: 1 if a had to be negated, 0 otherwise
+ *   bit 1: 1 if b had to be negated, 0 otherwise
+ *
+ * Factors pa, pb, qa and qb must be at most 2^15 in absolute value.
+ * Source integers a and b must be nonnegative; top word is not allowed
+ * to contain an extra 16th bit.
+ */
+static uint32_t
+co_reduce(uint16_t *a, uint16_t *b, size_t len,
+	int32_t pa, int32_t pb, int32_t qa, int32_t qb)
+{
+	size_t k;
+	int32_t cca, ccb;
+	uint32_t nega, negb;
+
+	cca = 0;
+	ccb = 0;
+	for (k = 0; k < len; k ++) {
+		uint32_t wa, wb, za, zb;
+		uint16_t tta, ttb;
+
+		/*
+		 * Since:
+		 *   |pa| <= 2^15
+		 *   |pb| <= 2^15
+		 *   0 <= wa <= 2^15 - 1
+		 *   0 <= wb <= 2^15 - 1
+		 *   |cca| <= 2^16 - 1
+		 * Then:
+		 *   |za| <= (2^15-1)*(2^16) + (2^16-1) = 2^31 - 1
+		 *
+		 * Thus, the new value of cca is such that |cca| <= 2^16 - 1.
+		 * The same applies to ccb.
+		 */
+		wa = a[k];
+		wb = b[k];
+		za = wa * (uint32_t)pa + wb * (uint32_t)pb + (uint32_t)cca;
+		zb = wa * (uint32_t)qa + wb * (uint32_t)qb + (uint32_t)ccb;
+		if (k > 0) {
+			a[k - 1] = za & 0x7FFF;
+			b[k - 1] = zb & 0x7FFF;
+		}
+		tta = za >> 15;
+		ttb = zb >> 15;
+		cca = *(int16_t *)&tta;
+		ccb = *(int16_t *)&ttb;
+	}
+	a[len - 1] = (uint16_t)cca;
+	b[len - 1] = (uint16_t)ccb;
+	nega = (uint32_t)cca >> 31;
+	negb = (uint32_t)ccb >> 31;
+	cond_negate(a, len, nega);
+	cond_negate(b, len, negb);
+	return nega | (negb << 1);
+}
+
+/*
+ * Compute:
+ *   a <- (a*pa+b*pb)/(2^15) mod m
+ *   b <- (a*qa+b*qb)/(2^15) mod m
+ *
+ * m0i is equal to -1/m[0] mod 2^15.
+ *
+ * Factors pa, pb, qa and qb must be at most 2^15 in absolute value.
+ * Source integers a and b must be nonnegative; top word is not allowed
+ * to contain an extra 16th bit.
+ */
+static void
+co_reduce_mod(uint16_t *a, uint16_t *b, size_t len,
+	int32_t pa, int32_t pb, int32_t qa, int32_t qb,
+	const uint16_t *m, uint16_t m0i)
+{
+	size_t k;
+	int32_t cca, ccb, fa, fb;
+
+	cca = 0;
+	ccb = 0;
+	fa = ((a[0] * (uint32_t)pa + b[0] * (uint32_t)pb) * m0i) & 0x7FFF;
+	fb = ((a[0] * (uint32_t)qa + b[0] * (uint32_t)qb) * m0i) & 0x7FFF;
+	for (k = 0; k < len; k ++) {
+		uint32_t wa, wb, za, zb;
+		uint32_t tta, ttb;
+
+		/*
+		 * In this loop, carries 'cca' and 'ccb' always fit on
+		 * 17 bits (in absolute value).
+		 */
+		wa = a[k];
+		wb = b[k];
+		za = wa * (uint32_t)pa + wb * (uint32_t)pb
+			+ m[k] * (uint32_t)fa + (uint32_t)cca;
+		zb = wa * (uint32_t)qa + wb * (uint32_t)qb
+			+ m[k] * (uint32_t)fb + (uint32_t)ccb;
+		if (k > 0) {
+			a[k - 1] = za & 0x7FFF;
+			b[k - 1] = zb & 0x7FFF;
+		}
+
+		/*
+		 * The XOR-and-sub construction below does an arithmetic
+		 * right shift in a portable way (technically, right-shifting
+		 * a negative signed value is implementation-defined in C).
+		 */
+#define M   ((uint32_t)1 << 16)
+		tta = za >> 15;
+		ttb = zb >> 15;
+		tta = (tta ^ M) - M;
+		ttb = (ttb ^ M) - M;
+		cca = *(int32_t *)&tta;
+		ccb = *(int32_t *)&ttb;
+#undef M
+	}
+	a[len - 1] = (uint32_t)cca;
+	b[len - 1] = (uint32_t)ccb;
+
+	/*
+	 * At this point:
+	 *   -m <= a < 2*m
+	 *   -m <= b < 2*m
+	 * (this is a case of Montgomery reduction)
+	 * The top word of 'a' and 'b' may have a 16-th bit set.
+	 * We may have to add or subtract the modulus.
+	 */
+	finish_mod(a, len, m, (uint32_t)cca >> 31);
+	finish_mod(b, len, m, (uint32_t)ccb >> 31);
+}
+
+/* see inner.h */
+uint32_t
+br_i15_moddiv(uint16_t *x, const uint16_t *y, const uint16_t *m, uint16_t m0i,
+	uint16_t *t)
+{
+	/*
+	 * Algorithm is an extended binary GCD. We maintain four values
+	 * a, b, u and v, with the following invariants:
+	 *
+	 *   a * x = y * u mod m
+	 *   b * x = y * v mod m
+	 *
+	 * Starting values are:
+	 *
+	 *   a = y
+	 *   b = m
+	 *   u = x
+	 *   v = 0
+	 *
+	 * The formal definition of the algorithm is a sequence of steps:
+	 *
+	 *   - If a is even, then a <- a/2 and u <- u/2 mod m.
+	 *   - Otherwise, if b is even, then b <- b/2 and v <- v/2 mod m.
+	 *   - Otherwise, if a > b, then a <- (a-b)/2 and u <- (u-v)/2 mod m.
+	 *   - Otherwise, b <- (b-a)/2 and v <- (v-u)/2 mod m.
+	 *
+	 * Algorithm stops when a = b. At that point, they both are equal
+	 * to GCD(y,m); the modular division succeeds if that value is 1.
+	 * The result of the modular division is then u (or v: both are
+	 * equal at that point).
+	 *
+	 * Each step makes either a or b shrink by at least one bit; hence,
+	 * if m has bit length k bits, then 2k-2 steps are sufficient.
+	 *
+	 *
+	 * Though complexity is quadratic in the size of m, the bit-by-bit
+	 * processing is not very efficient. We can speed up processing by
+	 * remarking that the decisions are taken based only on observation
+	 * of the top and low bits of a and b.
+	 *
+	 * In the loop below, at each iteration, we use the two top words
+	 * of a and b, and the low words of a and b, to compute reduction
+	 * parameters pa, pb, qa and qb such that the new values for a
+	 * and b are:
+	 *
+	 *   a' = (a*pa + b*pb) / (2^15)
+	 *   b' = (a*qa + b*qb) / (2^15)
+	 *
+	 * the division being exact.
+	 *
+	 * Since the choices are based on the top words, they may be slightly
+	 * off, requiring an optional correction: if a' < 0, then we replace
+	 * pa with -pa, and pb with -pb. The total length of a and b is
+	 * thus reduced by at least 14 bits at each iteration.
+	 *
+	 * The stopping conditions are still the same, though: when a
+	 * and b become equal, they must be both odd (since m is odd,
+	 * the GCD cannot be even), therefore the next operation is a
+	 * subtraction, and one of the values becomes 0. At that point,
+	 * nothing else happens, i.e. one value is stuck at 0, and the
+	 * other one is the GCD.
+	 */
+	size_t len, k;
+	uint16_t *a, *b, *u, *v;
+	uint32_t num, r;
+
+	len = (m[0] + 15) >> 4;
+	a = t;
+	b = a + len;
+	u = x + 1;
+	v = b + len;
+	memcpy(a, y + 1, len * sizeof *y);
+	memcpy(b, m + 1, len * sizeof *m);
+	memset(v, 0, len * sizeof *v);
+
+	/*
+	 * Loop below ensures that a and b are reduced by some bits each,
+	 * for a total of at least 14 bits.
+	 */
+	for (num = ((m[0] - (m[0] >> 4)) << 1) + 14; num >= 14; num -= 14) {
+		size_t j;
+		uint32_t c0, c1;
+		uint32_t a0, a1, b0, b1;
+		uint32_t a_hi, b_hi, a_lo, b_lo;
+		int32_t pa, pb, qa, qb;
+		int i;
+
+		/*
+		 * Extract top words of a and b. If j is the highest
+		 * index >= 1 such that a[j] != 0 or b[j] != 0, then we want
+		 * (a[j] << 15) + a[j - 1], and (b[j] << 15) + b[j - 1].
+		 * If a and b are down to one word each, then we use a[0]
+		 * and b[0].
+		 */
+		c0 = (uint32_t)-1;
+		c1 = (uint32_t)-1;
+		a0 = 0;
+		a1 = 0;
+		b0 = 0;
+		b1 = 0;
+		j = len;
+		while (j -- > 0) {
+			uint32_t aw, bw;
+
+			aw = a[j];
+			bw = b[j];
+			a0 ^= (a0 ^ aw) & c0;
+			a1 ^= (a1 ^ aw) & c1;
+			b0 ^= (b0 ^ bw) & c0;
+			b1 ^= (b1 ^ bw) & c1;
+			c1 = c0;
+			c0 &= (((aw | bw) + 0xFFFF) >> 16) - (uint32_t)1;
+		}
+
+		/*
+		 * If c1 = 0, then we grabbed two words for a and b.
+		 * If c1 != 0 but c0 = 0, then we grabbed one word. It
+		 * is not possible that c1 != 0 and c0 != 0, because that
+		 * would mean that both integers are zero.
+		 */
+		a1 |= a0 & c1;
+		a0 &= ~c1;
+		b1 |= b0 & c1;
+		b0 &= ~c1;
+		a_hi = (a0 << 15) + a1;
+		b_hi = (b0 << 15) + b1;
+		a_lo = a[0];
+		b_lo = b[0];
+
+		/*
+		 * Compute reduction factors:
+		 *
+		 *   a' = a*pa + b*pb
+		 *   b' = a*qa + b*qb
+		 *
+		 * such that a' and b' are both multiple of 2^15, but are
+		 * only marginally larger than a and b.
+		 */
+		pa = 1;
+		pb = 0;
+		qa = 0;
+		qb = 1;
+		for (i = 0; i < 15; i ++) {
+			/*
+			 * At each iteration:
+			 *
+			 *   a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi
+			 *   b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi
+			 *   a <- a/2 if: a is even
+			 *   b <- b/2 if: a is odd, b is even
+			 *
+			 * We multiply a_lo and b_lo by 2 at each
+			 * iteration, thus a division by 2 really is a
+			 * non-multiplication by 2.
+			 */
+			uint32_t r, oa, ob, cAB, cBA, cA;
+
+			/*
+			 * cAB = 1 if b must be subtracted from a
+			 * cBA = 1 if a must be subtracted from b
+			 * cA = 1 if a is divided by 2, 0 otherwise
+			 *
+			 * Rules:
+			 *
+			 *   cAB and cBA cannot be both 1.
+			 *   if a is not divided by 2, b is.
+			 */
+			r = GT(a_hi, b_hi);
+			oa = (a_lo >> i) & 1;
+			ob = (b_lo >> i) & 1;
+			cAB = oa & ob & r;
+			cBA = oa & ob & NOT(r);
+			cA = cAB | NOT(oa);
+
+			/*
+			 * Conditional subtractions.
+			 */
+			a_lo -= b_lo & -cAB;
+			a_hi -= b_hi & -cAB;
+			pa -= qa & -(int32_t)cAB;
+			pb -= qb & -(int32_t)cAB;
+			b_lo -= a_lo & -cBA;
+			b_hi -= a_hi & -cBA;
+			qa -= pa & -(int32_t)cBA;
+			qb -= pb & -(int32_t)cBA;
+
+			/*
+			 * Shifting.
+			 */
+			a_lo += a_lo & (cA - 1);
+			pa += pa & ((int32_t)cA - 1);
+			pb += pb & ((int32_t)cA - 1);
+			a_hi ^= (a_hi ^ (a_hi >> 1)) & -cA;
+			b_lo += b_lo & -cA;
+			qa += qa & -(int32_t)cA;
+			qb += qb & -(int32_t)cA;
+			b_hi ^= (b_hi ^ (b_hi >> 1)) & (cA - 1);
+		}
+
+		/*
+		 * Replace a and b with new values a' and b'.
+		 */
+		r = co_reduce(a, b, len, pa, pb, qa, qb);
+		pa -= pa * ((r & 1) << 1);
+		pb -= pb * ((r & 1) << 1);
+		qa -= qa * (r & 2);
+		qb -= qb * (r & 2);
+		co_reduce_mod(u, v, len, pa, pb, qa, qb, m + 1, m0i);
+	}
+
+	/*
+	 * Now one of the arrays should be 0, and the other contains
+	 * the GCD. If a is 0, then u is 0 as well, and v contains
+	 * the division result.
+	 * Result is correct if and only if GCD is 1.
+	 */
+	r = (a[0] | b[0]) ^ 1;
+	u[0] |= v[0];
+	for (k = 1; k < len; k ++) {
+		r |= a[k] | b[k];
+		u[k] |= v[k];
+	}
+	return EQ0(r);
+}
diff --git a/third_party/bearssl/src/i15_modpow.c b/third_party/bearssl/src/i15_modpow.c
new file mode 100644
index 0000000..9bf304e
--- /dev/null
+++ b/third_party/bearssl/src/i15_modpow.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_modpow(uint16_t *x,
+	const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2)
+{
+	size_t mlen;
+	unsigned k;
+
+	mlen = ((m[0] + 31) >> 4) * sizeof m[0];
+	memcpy(t1, x, mlen);
+	br_i15_to_monty(t1, m);
+	br_i15_zero(x, m[0]);
+	x[1] = 1;
+	for (k = 0; k < ((unsigned)elen << 3); k ++) {
+		uint32_t ctl;
+
+		ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1;
+		br_i15_montymul(t2, x, t1, m, m0i);
+		CCOPY(ctl, x, t2, mlen);
+		br_i15_montymul(t2, t1, t1, m, m0i);
+		memcpy(t1, t2, mlen);
+	}
+}
diff --git a/third_party/bearssl/src/i15_modpow2.c b/third_party/bearssl/src/i15_modpow2.c
new file mode 100644
index 0000000..4b32118
--- /dev/null
+++ b/third_party/bearssl/src/i15_modpow2.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_modpow_opt(uint16_t *x,
+	const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *tmp, size_t twlen)
+{
+	size_t mlen, mwlen;
+	uint16_t *t1, *t2, *base;
+	size_t u, v;
+	uint32_t acc;
+	int acc_len, win_len;
+
+	/*
+	 * Get modulus size.
+	 */
+	mwlen = (m[0] + 31) >> 4;
+	mlen = mwlen * sizeof m[0];
+	mwlen += (mwlen & 1);
+	t1 = tmp;
+	t2 = tmp + mwlen;
+
+	/*
+	 * Compute possible window size, with a maximum of 5 bits.
+	 * When the window has size 1 bit, we use a specific code
+	 * that requires only two temporaries. Otherwise, for a
+	 * window of k bits, we need 2^k+1 temporaries.
+	 */
+	if (twlen < (mwlen << 1)) {
+		return 0;
+	}
+	for (win_len = 5; win_len > 1; win_len --) {
+		if ((((uint32_t)1 << win_len) + 1) * mwlen <= twlen) {
+			break;
+		}
+	}
+
+	/*
+	 * Everything is done in Montgomery representation.
+	 */
+	br_i15_to_monty(x, m);
+
+	/*
+	 * Compute window contents. If the window has size one bit only,
+	 * then t2 is set to x; otherwise, t2[0] is left untouched, and
+	 * t2[k] is set to x^k (for k >= 1).
+	 */
+	if (win_len == 1) {
+		memcpy(t2, x, mlen);
+	} else {
+		memcpy(t2 + mwlen, x, mlen);
+		base = t2 + mwlen;
+		for (u = 2; u < ((unsigned)1 << win_len); u ++) {
+			br_i15_montymul(base + mwlen, base, x, m, m0i);
+			base += mwlen;
+		}
+	}
+
+	/*
+	 * We need to set x to 1, in Montgomery representation. This can
+	 * be done efficiently by setting the high word to 1, then doing
+	 * one word-sized shift.
+	 */
+	br_i15_zero(x, m[0]);
+	x[(m[0] + 15) >> 4] = 1;
+	br_i15_muladd_small(x, 0, m);
+
+	/*
+	 * We process bits from most to least significant. At each
+	 * loop iteration, we have acc_len bits in acc.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (acc_len > 0 || elen > 0) {
+		int i, k;
+		uint32_t bits;
+
+		/*
+		 * Get the next bits.
+		 */
+		k = win_len;
+		if (acc_len < win_len) {
+			if (elen > 0) {
+				acc = (acc << 8) | *e ++;
+				elen --;
+				acc_len += 8;
+			} else {
+				k = acc_len;
+			}
+		}
+		bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1);
+		acc_len -= k;
+
+		/*
+		 * We could get exactly k bits. Compute k squarings.
+		 */
+		for (i = 0; i < k; i ++) {
+			br_i15_montymul(t1, x, x, m, m0i);
+			memcpy(x, t1, mlen);
+		}
+
+		/*
+		 * Window lookup: we want to set t2 to the window
+		 * lookup value, assuming the bits are non-zero. If
+		 * the window length is 1 bit only, then t2 is
+		 * already set; otherwise, we do a constant-time lookup.
+		 */
+		if (win_len > 1) {
+			br_i15_zero(t2, m[0]);
+			base = t2 + mwlen;
+			for (u = 1; u < ((uint32_t)1 << k); u ++) {
+				uint32_t mask;
+
+				mask = -EQ(u, bits);
+				for (v = 1; v < mwlen; v ++) {
+					t2[v] |= mask & base[v];
+				}
+				base += mwlen;
+			}
+		}
+
+		/*
+		 * Multiply with the looked-up value. We keep the
+		 * product only if the exponent bits are not all-zero.
+		 */
+		br_i15_montymul(t1, x, t2, m, m0i);
+		CCOPY(NEQ(bits, 0), x, t1, mlen);
+	}
+
+	/*
+	 * Convert back from Montgomery representation, and exit.
+	 */
+	br_i15_from_monty(x, m, m0i);
+	return 1;
+}
diff --git a/third_party/bearssl/src/i15_montmul.c b/third_party/bearssl/src/i15_montmul.c
new file mode 100644
index 0000000..e98bc32
--- /dev/null
+++ b/third_party/bearssl/src/i15_montmul.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y,
+	const uint16_t *m, uint16_t m0i)
+{
+	size_t len, len4, u, v;
+	uint32_t dh;
+
+	len = (m[0] + 15) >> 4;
+	len4 = len & ~(size_t)3;
+	br_i15_zero(d, m[0]);
+	dh = 0;
+	for (u = 0; u < len; u ++) {
+		uint32_t f, xu, r, zh;
+
+		xu = x[u + 1];
+		f = MUL15((d[1] + MUL15(x[u + 1], y[1])) & 0x7FFF, m0i)
+			& 0x7FFF;
+#if BR_ARMEL_CORTEXM_GCC
+		if (len4 != 0) {
+			uint16_t *limit;
+
+			limit = d + len4;
+			asm volatile (
+"\n\
+	@ carry: r=r2                                              \n\
+	@ multipliers: xu=r3 f=r4                                  \n\
+	@ base registers: d+v=r5 y+v=r6 m+v=r7                     \n\
+	@ r8 contains 0x7FFF                                       \n\
+	@ r9 contains d+len4                                       \n\
+	ldr	r0, %[limit]                                       \n\
+	ldr	r3, %[xu]                                          \n\
+	mov	r9, r0                                             \n\
+	ldr	r4, %[f]                                           \n\
+	eor	r2, r2                                             \n\
+	ldr	r5, %[d]                                           \n\
+	sub	r1, r2, #1                                         \n\
+	ldr	r6, %[y]                                           \n\
+	lsr	r1, r1, #17                                        \n\
+	ldr	r7, %[m]                                           \n\
+	mov	r8, r1                                             \n\
+loop%=:                                                            \n\
+	ldrh	r0, [r6, #2]                                       \n\
+	ldrh	r1, [r7, #2]                                       \n\
+	mul	r0, r3                                             \n\
+	mul	r1, r4                                             \n\
+	add	r2, r0, r2                                         \n\
+	ldrh	r0, [r5, #2]                                       \n\
+	add	r2, r1, r2                                         \n\
+	mov	r1, r8                                             \n\
+	add	r2, r0, r2                                         \n\
+	and	r1, r2                                             \n\
+	lsr	r2, r2, #15                                        \n\
+	strh	r1, [r5, #0]                                       \n\
+		                                                   \n\
+	ldrh	r0, [r6, #4]                                       \n\
+	ldrh	r1, [r7, #4]                                       \n\
+	mul	r0, r3                                             \n\
+	mul	r1, r4                                             \n\
+	add	r2, r0, r2                                         \n\
+	ldrh	r0, [r5, #4]                                       \n\
+	add	r2, r1, r2                                         \n\
+	mov	r1, r8                                             \n\
+	add	r2, r0, r2                                         \n\
+	and	r1, r2                                             \n\
+	lsr	r2, r2, #15                                        \n\
+	strh	r1, [r5, #2]                                       \n\
+		                                                   \n\
+	ldrh	r0, [r6, #6]                                       \n\
+	ldrh	r1, [r7, #6]                                       \n\
+	mul	r0, r3                                             \n\
+	mul	r1, r4                                             \n\
+	add	r2, r0, r2                                         \n\
+	ldrh	r0, [r5, #6]                                       \n\
+	add	r2, r1, r2                                         \n\
+	mov	r1, r8                                             \n\
+	add	r2, r0, r2                                         \n\
+	and	r1, r2                                             \n\
+	lsr	r2, r2, #15                                        \n\
+	strh	r1, [r5, #4]                                       \n\
+		                                                   \n\
+	ldrh	r0, [r6, #8]                                       \n\
+	ldrh	r1, [r7, #8]                                       \n\
+	mul	r0, r3                                             \n\
+	mul	r1, r4                                             \n\
+	add	r2, r0, r2                                         \n\
+	ldrh	r0, [r5, #8]                                       \n\
+	add	r2, r1, r2                                         \n\
+	mov	r1, r8                                             \n\
+	add	r2, r0, r2                                         \n\
+	and	r1, r2                                             \n\
+	lsr	r2, r2, #15                                        \n\
+	strh	r1, [r5, #6]                                       \n\
+		                                                   \n\
+	add	r5, r5, #8                                         \n\
+	add	r6, r6, #8                                         \n\
+	add	r7, r7, #8                                         \n\
+	cmp	r5, r9                                             \n\
+	bne	loop%=                                             \n\
+		                                                   \n\
+	str	r2, %[carry]                                       \n\
+"
+: [carry] "=m" (r)
+: [xu] "m" (xu), [f] "m" (f), [d] "m" (d), [y] "m" (y),
+	[m] "m" (m), [limit] "m" (limit)
+: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" );
+		} else {
+			r = 0;
+		}
+		v = len4;
+#else
+		r = 0;
+		for (v = 0; v < len4; v += 4) {
+			uint32_t z;
+
+			z = d[v + 1] + MUL15(xu, y[v + 1])
+				+ MUL15(f, m[v + 1]) + r;
+			r = z >> 15;
+			d[v + 0] = z & 0x7FFF;
+			z = d[v + 2] + MUL15(xu, y[v + 2])
+				+ MUL15(f, m[v + 2]) + r;
+			r = z >> 15;
+			d[v + 1] = z & 0x7FFF;
+			z = d[v + 3] + MUL15(xu, y[v + 3])
+				+ MUL15(f, m[v + 3]) + r;
+			r = z >> 15;
+			d[v + 2] = z & 0x7FFF;
+			z = d[v + 4] + MUL15(xu, y[v + 4])
+				+ MUL15(f, m[v + 4]) + r;
+			r = z >> 15;
+			d[v + 3] = z & 0x7FFF;
+		}
+#endif
+		for (; v < len; v ++) {
+			uint32_t z;
+
+			z = d[v + 1] + MUL15(xu, y[v + 1])
+				+ MUL15(f, m[v + 1]) + r;
+			r = z >> 15;
+			d[v + 0] = z & 0x7FFF;
+		}
+
+		zh = dh + r;
+		d[len] = zh & 0x7FFF;
+		dh = zh >> 15;
+	}
+
+	/*
+	 * Restore the bit length (it was overwritten in the loop above).
+	 */
+	d[0] = m[0];
+
+	/*
+	 * d[] may be greater than m[], but it is still lower than twice
+	 * the modulus.
+	 */
+	br_i15_sub(d, m, NEQ(dh, 0) | NOT(br_i15_sub(d, m, 0)));
+}
diff --git a/third_party/bearssl/src/i15_mulacc.c b/third_party/bearssl/src/i15_mulacc.c
new file mode 100644
index 0000000..7a073ac
--- /dev/null
+++ b/third_party/bearssl/src/i15_mulacc.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	size_t alen, blen, u;
+	unsigned dl, dh;
+
+	alen = (a[0] + 15) >> 4;
+	blen = (b[0] + 15) >> 4;
+
+	/*
+	 * Announced bit length of d[] will be the sum of the announced
+	 * bit lengths of a[] and b[]; but the lengths are encoded.
+	 */
+	dl = (a[0] & 15) + (b[0] & 15);
+	dh = (a[0] >> 4) + (b[0] >> 4);
+	d[0] = (dh << 4) + dl + (~(uint32_t)(dl - 15) >> 31);
+
+	for (u = 0; u < blen; u ++) {
+		uint32_t f;
+		size_t v;
+		uint32_t cc;
+
+		f = b[1 + u];
+		cc = 0;
+		for (v = 0; v < alen; v ++) {
+			uint32_t z;
+
+			z = (uint32_t)d[1 + u + v] + MUL15(f, a[1 + v]) + cc;
+			cc = z >> 15;
+			d[1 + u + v] = z & 0x7FFF;
+		}
+		d[1 + u + alen] = cc;
+	}
+}
diff --git a/third_party/bearssl/src/i15_muladd.c b/third_party/bearssl/src/i15_muladd.c
new file mode 100644
index 0000000..c4b7216
--- /dev/null
+++ b/third_party/bearssl/src/i15_muladd.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Constant-time division. The divisor must not be larger than 16 bits,
+ * and the quotient must fit on 17 bits.
+ */
+static uint32_t
+divrem16(uint32_t x, uint32_t d, uint32_t *r)
+{
+	int i;
+	uint32_t q;
+
+	q = 0;
+	d <<= 16;
+	for (i = 16; i >= 0; i --) {
+		uint32_t ctl;
+
+		ctl = LE(d, x);
+		q |= ctl << i;
+		x -= (-ctl) & d;
+		d >>= 1;
+	}
+	if (r != NULL) {
+		*r = x;
+	}
+	return q;
+}
+
+/* see inner.h */
+void
+br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m)
+{
+	/*
+	 * Constant-time: we accept to leak the exact bit length of the
+	 * modulus m.
+	 */
+	unsigned m_bitlen, mblr;
+	size_t u, mlen;
+	uint32_t hi, a0, a, b, q;
+	uint32_t cc, tb, over, under;
+
+	/*
+	 * Simple case: the modulus fits on one word.
+	 */
+	m_bitlen = m[0];
+	if (m_bitlen == 0) {
+		return;
+	}
+	if (m_bitlen <= 15) {
+		uint32_t rem;
+
+		divrem16(((uint32_t)x[1] << 15) | z, m[1], &rem);
+		x[1] = rem;
+		return;
+	}
+	mlen = (m_bitlen + 15) >> 4;
+	mblr = m_bitlen & 15;
+
+	/*
+	 * Principle: we estimate the quotient (x*2^15+z)/m by
+	 * doing a 30/15 division with the high words.
+	 *
+	 * Let:
+	 *   w = 2^15
+	 *   a = (w*a0 + a1) * w^N + a2
+	 *   b = b0 * w^N + b2
+	 * such that:
+	 *   0 <= a0 < w
+	 *   0 <= a1 < w
+	 *   0 <= a2 < w^N
+	 *   w/2 <= b0 < w
+	 *   0 <= b2 < w^N
+	 *   a < w*b
+	 * I.e. the two top words of a are a0:a1, the top word of b is
+	 * b0, we ensured that b0 is "full" (high bit set), and a is
+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
+	 *
+	 * If a = b*q + r (with 0 <= r < q), then we can estimate q by
+	 * using a division on the top words:
+	 *   a0*w + a1 = b0*u + v (with 0 <= v < b0)
+	 * Then the following holds:
+	 *   0 <= u <= w
+	 *   u-2 <= q <= u
+	 */
+	hi = x[mlen];
+	if (mblr == 0) {
+		a0 = x[mlen];
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a = (a0 << 15) + x[mlen];
+		b = m[mlen];
+	} else {
+		a0 = (x[mlen] << (15 - mblr)) | (x[mlen - 1] >> mblr);
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a = (a0 << 15) | (((x[mlen] << (15 - mblr))
+			| (x[mlen - 1] >> mblr)) & 0x7FFF);
+		b = (m[mlen] << (15 - mblr)) | (m[mlen - 1] >> mblr);
+	}
+	q = divrem16(a, b, NULL);
+
+	/*
+	 * We computed an estimate for q, but the real one may be q,
+	 * q-1 or q-2; moreover, the division may have returned a value
+	 * 8000 or even 8001 if the two high words were identical, and
+	 * we want to avoid values beyond 7FFF. We thus adjust q so
+	 * that the "true" multiplier will be q+1, q or q-1, and q is
+	 * in the 0000..7FFF range.
+	 */
+	q = MUX(EQ(b, a0), 0x7FFF, q - 1 + ((q - 1) >> 31));
+
+	/*
+	 * We subtract q*m from x (x has an extra high word of value 'hi').
+	 * Since q may be off by 1 (in either direction), we may have to
+	 * add or subtract m afterwards.
+	 *
+	 * The 'tb' flag will be true (1) at the end of the loop if the
+	 * result is greater than or equal to the modulus (not counting
+	 * 'hi' or the carry).
+	 */
+	cc = 0;
+	tb = 1;
+	for (u = 1; u <= mlen; u ++) {
+		uint32_t mw, zl, xw, nxw;
+
+		mw = m[u];
+		zl = MUL15(mw, q) + cc;
+		cc = zl >> 15;
+		zl &= 0x7FFF;
+		xw = x[u];
+		nxw = xw - zl;
+		cc += nxw >> 31;
+		nxw &= 0x7FFF;
+		x[u] = nxw;
+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
+	}
+
+	/*
+	 * If we underestimated q, then either cc < hi (one extra bit
+	 * beyond the top array word), or cc == hi and tb is true (no
+	 * extra bit, but the result is not lower than the modulus).
+	 *
+	 * If we overestimated q, then cc > hi.
+	 */
+	over = GT(cc, hi);
+	under = ~over & (tb | LT(cc, hi));
+	br_i15_add(x, m, over);
+	br_i15_sub(x, m, under);
+}
diff --git a/third_party/bearssl/src/i15_ninv15.c b/third_party/bearssl/src/i15_ninv15.c
new file mode 100644
index 0000000..de3a3ba
--- /dev/null
+++ b/third_party/bearssl/src/i15_ninv15.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint16_t
+br_i15_ninv15(uint16_t x)
+{
+	uint32_t y;
+
+	y = 2 - x;
+	y = MUL15(y, 2 - MUL15(x, y));
+	y = MUL15(y, 2 - MUL15(x, y));
+	y = MUL15(y, 2 - MUL15(x, y));
+	return MUX(x & 1, -y, 0) & 0x7FFF;
+}
diff --git a/third_party/bearssl/src/i15_reduce.c b/third_party/bearssl/src/i15_reduce.c
new file mode 100644
index 0000000..0931b10
--- /dev/null
+++ b/third_party/bearssl/src/i15_reduce.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m)
+{
+	uint32_t m_bitlen, a_bitlen;
+	size_t mlen, alen, u;
+
+	m_bitlen = m[0];
+	mlen = (m_bitlen + 15) >> 4;
+
+	x[0] = m_bitlen;
+	if (m_bitlen == 0) {
+		return;
+	}
+
+	/*
+	 * If the source is shorter, then simply copy all words from a[]
+	 * and zero out the upper words.
+	 */
+	a_bitlen = a[0];
+	alen = (a_bitlen + 15) >> 4;
+	if (a_bitlen < m_bitlen) {
+		memcpy(x + 1, a + 1, alen * sizeof *a);
+		for (u = alen; u < mlen; u ++) {
+			x[u + 1] = 0;
+		}
+		return;
+	}
+
+	/*
+	 * The source length is at least equal to that of the modulus.
+	 * We must thus copy N-1 words, and input the remaining words
+	 * one by one.
+	 */
+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
+	x[mlen] = 0;
+	for (u = 1 + alen - mlen; u > 0; u --) {
+		br_i15_muladd_small(x, a[u], m);
+	}
+}
diff --git a/third_party/bearssl/src/i15_rshift.c b/third_party/bearssl/src/i15_rshift.c
new file mode 100644
index 0000000..f9991ab
--- /dev/null
+++ b/third_party/bearssl/src/i15_rshift.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_rshift(uint16_t *x, int count)
+{
+	size_t u, len;
+	unsigned r;
+
+	len = (x[0] + 15) >> 4;
+	if (len == 0) {
+		return;
+	}
+	r = x[1] >> count;
+	for (u = 2; u <= len; u ++) {
+		unsigned w;
+
+		w = x[u];
+		x[u - 1] = ((w << (15 - count)) | r) & 0x7FFF;
+		r = w >> count;
+	}
+	x[len] = r;
+}
diff --git a/third_party/bearssl/src/i15_sub.c b/third_party/bearssl/src/i15_sub.c
new file mode 100644
index 0000000..1983c4d
--- /dev/null
+++ b/third_party/bearssl/src/i15_sub.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 31) >> 4;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw - bw - cc;
+		cc = naw >> 31;
+		a[u] = MUX(ctl, naw & 0x7FFF, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i15_tmont.c b/third_party/bearssl/src/i15_tmont.c
new file mode 100644
index 0000000..d5c4b8b
--- /dev/null
+++ b/third_party/bearssl/src/i15_tmont.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i15_to_monty(uint16_t *x, const uint16_t *m)
+{
+	unsigned k;
+
+	for (k = (m[0] + 15) >> 4; k > 0; k --) {
+		br_i15_muladd_small(x, 0, m);
+	}
+}
diff --git a/third_party/bearssl/src/i31_add.c b/third_party/bearssl/src/i31_add.c
new file mode 100644
index 0000000..2ca47c6
--- /dev/null
+++ b/third_party/bearssl/src/i31_add.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw + bw + cc;
+		cc = naw >> 31;
+		a[u] = MUX(ctl, naw & (uint32_t)0x7FFFFFFF, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i31_bitlen.c b/third_party/bearssl/src/i31_bitlen.c
new file mode 100644
index 0000000..3e127c2
--- /dev/null
+++ b/third_party/bearssl/src/i31_bitlen.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_bit_length(uint32_t *x, size_t xlen)
+{
+	uint32_t tw, twk;
+
+	tw = 0;
+	twk = 0;
+	while (xlen -- > 0) {
+		uint32_t w, c;
+
+		c = EQ(tw, 0);
+		w = x[xlen];
+		tw = MUX(c, w, tw);
+		twk = MUX(c, (uint32_t)xlen, twk);
+	}
+	return (twk << 5) + BIT_LENGTH(tw);
+}
diff --git a/third_party/bearssl/src/i31_decmod.c b/third_party/bearssl/src/i31_decmod.c
new file mode 100644
index 0000000..3cd7bfe
--- /dev/null
+++ b/third_party/bearssl/src/i31_decmod.c
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_decode_mod(uint32_t *x, const void *src, size_t len, const uint32_t *m)
+{
+	/*
+	 * Two-pass algorithm: in the first pass, we determine whether the
+	 * value fits; in the second pass, we do the actual write.
+	 *
+	 * During the first pass, 'r' contains the comparison result so
+	 * far:
+	 *  0x00000000   value is equal to the modulus
+	 *  0x00000001   value is greater than the modulus
+	 *  0xFFFFFFFF   value is lower than the modulus
+	 *
+	 * Since we iterate starting with the least significant bytes (at
+	 * the end of src[]), each new comparison overrides the previous
+	 * except when the comparison yields 0 (equal).
+	 *
+	 * During the second pass, 'r' is either 0xFFFFFFFF (value fits)
+	 * or 0x00000000 (value does not fit).
+	 *
+	 * We must iterate over all bytes of the source, _and_ possibly
+	 * some extra virtual bytes (with value 0) so as to cover the
+	 * complete modulus as well. We also add 4 such extra bytes beyond
+	 * the modulus length because it then guarantees that no accumulated
+	 * partial word remains to be processed.
+	 */
+	const unsigned char *buf;
+	size_t mlen, tlen;
+	int pass;
+	uint32_t r;
+
+	buf = src;
+	mlen = (m[0] + 31) >> 5;
+	tlen = (mlen << 2);
+	if (tlen < len) {
+		tlen = len;
+	}
+	tlen += 4;
+	r = 0;
+	for (pass = 0; pass < 2; pass ++) {
+		size_t u, v;
+		uint32_t acc;
+		int acc_len;
+
+		v = 1;
+		acc = 0;
+		acc_len = 0;
+		for (u = 0; u < tlen; u ++) {
+			uint32_t b;
+
+			if (u < len) {
+				b = buf[len - 1 - u];
+			} else {
+				b = 0;
+			}
+			acc |= (b << acc_len);
+			acc_len += 8;
+			if (acc_len >= 31) {
+				uint32_t xw;
+
+				xw = acc & (uint32_t)0x7FFFFFFF;
+				acc_len -= 31;
+				acc = b >> (8 - acc_len);
+				if (v <= mlen) {
+					if (pass) {
+						x[v] = r & xw;
+					} else {
+						uint32_t cc;
+
+						cc = (uint32_t)CMP(xw, m[v]);
+						r = MUX(EQ(cc, 0), r, cc);
+					}
+				} else {
+					if (!pass) {
+						r = MUX(EQ(xw, 0), r, 1);
+					}
+				}
+				v ++;
+			}
+		}
+
+		/*
+		 * When we reach this point at the end of the first pass:
+		 * r is either 0, 1 or -1; we want to set r to 0 if it
+		 * is equal to 0 or 1, and leave it to -1 otherwise.
+		 *
+		 * When we reach this point at the end of the second pass:
+		 * r is either 0 or -1; we want to leave that value
+		 * untouched. This is a subcase of the previous.
+		 */
+		r >>= 1;
+		r |= (r << 1);
+	}
+
+	x[0] = m[0];
+	return r & (uint32_t)1;
+}
diff --git a/third_party/bearssl/src/i31_decode.c b/third_party/bearssl/src/i31_decode.c
new file mode 100644
index 0000000..8ec6d90
--- /dev/null
+++ b/third_party/bearssl/src/i31_decode.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_decode(uint32_t *x, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	size_t u, v;
+	uint32_t acc;
+	int acc_len;
+
+	buf = src;
+	u = len;
+	v = 1;
+	acc = 0;
+	acc_len = 0;
+	while (u -- > 0) {
+		uint32_t b;
+
+		b = buf[u];
+		acc |= (b << acc_len);
+		acc_len += 8;
+		if (acc_len >= 31) {
+			x[v ++] = acc & (uint32_t)0x7FFFFFFF;
+			acc_len -= 31;
+			acc = b >> (8 - acc_len);
+		}
+	}
+	if (acc_len != 0) {
+		x[v ++] = acc;
+	}
+	x[0] = br_i31_bit_length(x + 1, v - 1);
+}
diff --git a/third_party/bearssl/src/i31_decred.c b/third_party/bearssl/src/i31_decred.c
new file mode 100644
index 0000000..43db662
--- /dev/null
+++ b/third_party/bearssl/src/i31_decred.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_decode_reduce(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m)
+{
+	uint32_t m_ebitlen, m_rbitlen;
+	size_t mblen, k;
+	const unsigned char *buf;
+	uint32_t acc;
+	int acc_len;
+
+	/*
+	 * Get the encoded bit length.
+	 */
+	m_ebitlen = m[0];
+
+	/*
+	 * Special case for an invalid (null) modulus.
+	 */
+	if (m_ebitlen == 0) {
+		x[0] = 0;
+		return;
+	}
+
+	/*
+	 * Clear the destination.
+	 */
+	br_i31_zero(x, m_ebitlen);
+
+	/*
+	 * First decode directly as many bytes as possible. This requires
+	 * computing the actual bit length.
+	 */
+	m_rbitlen = m_ebitlen >> 5;
+	m_rbitlen = (m_ebitlen & 31) + (m_rbitlen << 5) - m_rbitlen;
+	mblen = (m_rbitlen + 7) >> 3;
+	k = mblen - 1;
+	if (k >= len) {
+		br_i31_decode(x, src, len);
+		x[0] = m_ebitlen;
+		return;
+	}
+	buf = src;
+	br_i31_decode(x, buf, k);
+	x[0] = m_ebitlen;
+
+	/*
+	 * Input remaining bytes, using 31-bit words.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (k < len) {
+		uint32_t v;
+
+		v = buf[k ++];
+		if (acc_len >= 23) {
+			acc_len -= 23;
+			acc <<= (8 - acc_len);
+			acc |= v >> acc_len;
+			br_i31_muladd_small(x, acc, m);
+			acc = v & (0xFF >> (8 - acc_len));
+		} else {
+			acc = (acc << 8) | v;
+			acc_len += 8;
+		}
+	}
+
+	/*
+	 * We may have some bits accumulated. We then perform a shift to
+	 * be able to inject these bits as a full 31-bit word.
+	 */
+	if (acc_len != 0) {
+		acc = (acc | (x[1] << acc_len)) & 0x7FFFFFFF;
+		br_i31_rshift(x, 31 - acc_len);
+		br_i31_muladd_small(x, acc, m);
+	}
+}
diff --git a/third_party/bearssl/src/i31_encode.c b/third_party/bearssl/src/i31_encode.c
new file mode 100644
index 0000000..b6b40c4
--- /dev/null
+++ b/third_party/bearssl/src/i31_encode.c
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_encode(void *dst, size_t len, const uint32_t *x)
+{
+	unsigned char *buf;
+	size_t k, xlen;
+	uint32_t acc;
+	int acc_len;
+
+	xlen = (x[0] + 31) >> 5;
+	if (xlen == 0) {
+		memset(dst, 0, len);
+		return;
+	}
+	buf = (unsigned char *)dst + len;
+	k = 1;
+	acc = 0;
+	acc_len = 0;
+	while (len != 0) {
+		uint32_t w;
+
+		w = (k <= xlen) ? x[k] : 0;
+		k ++;
+		if (acc_len == 0) {
+			acc = w;
+			acc_len = 31;
+		} else {
+			uint32_t z;
+
+			z = acc | (w << acc_len);
+			acc_len --;
+			acc = w >> (31 - acc_len);
+			if (len >= 4) {
+				buf -= 4;
+				len -= 4;
+				br_enc32be(buf, z);
+			} else {
+				switch (len) {
+				case 3:
+					buf[-3] = (unsigned char)(z >> 16);
+					/* fall through */
+				case 2:
+					buf[-2] = (unsigned char)(z >> 8);
+					/* fall through */
+				case 1:
+					buf[-1] = (unsigned char)z;
+					break;
+				}
+				return;
+			}
+		}
+	}
+}
diff --git a/third_party/bearssl/src/i31_fmont.c b/third_party/bearssl/src/i31_fmont.c
new file mode 100644
index 0000000..c24b417
--- /dev/null
+++ b/third_party/bearssl/src/i31_fmont.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i)
+{
+	size_t len, u, v;
+
+	len = (m[0] + 31) >> 5;
+	for (u = 0; u < len; u ++) {
+		uint32_t f;
+		uint64_t cc;
+
+		f = MUL31_lo(x[1], m0i);
+		cc = 0;
+		for (v = 0; v < len; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)x[v + 1] + MUL31(f, m[v + 1]) + cc;
+			cc = z >> 31;
+			if (v != 0) {
+				x[v] = (uint32_t)z & 0x7FFFFFFF;
+			}
+		}
+		x[len] = (uint32_t)cc;
+	}
+
+	/*
+	 * We may have to do an extra subtraction, but only if the
+	 * value in x[] is indeed greater than or equal to that of m[],
+	 * which is why we must do two calls (first call computes the
+	 * carry, second call performs the subtraction only if the carry
+	 * is 0).
+	 */
+	br_i31_sub(x, m, NOT(br_i31_sub(x, m, 0)));
+}
diff --git a/third_party/bearssl/src/i31_iszero.c b/third_party/bearssl/src/i31_iszero.c
new file mode 100644
index 0000000..8a7ea44
--- /dev/null
+++ b/third_party/bearssl/src/i31_iszero.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_iszero(const uint32_t *x)
+{
+	uint32_t z;
+	size_t u;
+
+	z = 0;
+	for (u = (x[0] + 31) >> 5; u > 0; u --) {
+		z |= x[u];
+	}
+	return ~(z | -z) >> 31;
+}
diff --git a/third_party/bearssl/src/i31_moddiv.c b/third_party/bearssl/src/i31_moddiv.c
new file mode 100644
index 0000000..9950591
--- /dev/null
+++ b/third_party/bearssl/src/i31_moddiv.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * In this file, we handle big integers with a custom format, i.e.
+ * without the usual one-word header. Value is split into 31-bit words,
+ * each stored in a 32-bit slot (top bit is zero) in little-endian
+ * order. The length (in words) is provided explicitly. In some cases,
+ * the value can be negative (using two's complement representation). In
+ * some cases, the top word is allowed to have a 32th bit.
+ */
+
+/*
+ * Negate big integer conditionally. The value consists of 'len' words,
+ * with 31 bits in each word (the top bit of each word should be 0,
+ * except possibly for the last word). If 'ctl' is 1, the negation is
+ * computed; otherwise, if 'ctl' is 0, then the value is unchanged.
+ */
+static void
+cond_negate(uint32_t *a, size_t len, uint32_t ctl)
+{
+	size_t k;
+	uint32_t cc, xm;
+
+	cc = ctl;
+	xm = -ctl >> 1;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw;
+
+		aw = a[k];
+		aw = (aw ^ xm) + cc;
+		a[k] = aw & 0x7FFFFFFF;
+		cc = aw >> 31;
+	}
+}
+
+/*
+ * Finish modular reduction. Rules on input parameters:
+ *
+ *   if neg = 1, then -m <= a < 0
+ *   if neg = 0, then 0 <= a < 2*m
+ *
+ * If neg = 0, then the top word of a[] may use 32 bits.
+ *
+ * Also, modulus m must be odd.
+ */
+static void
+finish_mod(uint32_t *a, size_t len, const uint32_t *m, uint32_t neg)
+{
+	size_t k;
+	uint32_t cc, xm, ym;
+
+	/*
+	 * First pass: compare a (assumed nonnegative) with m.
+	 * Note that if the final word uses the top extra bit, then
+	 * subtracting m must yield a value less than 2^31, since we
+	 * assumed that a < 2*m.
+	 */
+	cc = 0;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw, mw;
+
+		aw = a[k];
+		mw = m[k];
+		cc = (aw - mw - cc) >> 31;
+	}
+
+	/*
+	 * At this point:
+	 *   if neg = 1, then we must add m (regardless of cc)
+	 *   if neg = 0 and cc = 0, then we must subtract m
+	 *   if neg = 0 and cc = 1, then we must do nothing
+	 */
+	xm = -neg >> 1;
+	ym = -(neg | (1 - cc));
+	cc = neg;
+	for (k = 0; k < len; k ++) {
+		uint32_t aw, mw;
+
+		aw = a[k];
+		mw = (m[k] ^ xm) & ym;
+		aw = aw - mw - cc;
+		a[k] = aw & 0x7FFFFFFF;
+		cc = aw >> 31;
+	}
+}
+
+/*
+ * Compute:
+ *   a <- (a*pa+b*pb)/(2^31)
+ *   b <- (a*qa+b*qb)/(2^31)
+ * The division is assumed to be exact (i.e. the low word is dropped).
+ * If the final a is negative, then it is negated. Similarly for b.
+ * Returned value is the combination of two bits:
+ *   bit 0: 1 if a had to be negated, 0 otherwise
+ *   bit 1: 1 if b had to be negated, 0 otherwise
+ *
+ * Factors pa, pb, qa and qb must be at most 2^31 in absolute value.
+ * Source integers a and b must be nonnegative; top word is not allowed
+ * to contain an extra 32th bit.
+ */
+static uint32_t
+co_reduce(uint32_t *a, uint32_t *b, size_t len,
+	int64_t pa, int64_t pb, int64_t qa, int64_t qb)
+{
+	size_t k;
+	int64_t cca, ccb;
+	uint32_t nega, negb;
+
+	cca = 0;
+	ccb = 0;
+	for (k = 0; k < len; k ++) {
+		uint32_t wa, wb;
+		uint64_t za, zb;
+		uint64_t tta, ttb;
+
+		/*
+		 * Since:
+		 *   |pa| <= 2^31
+		 *   |pb| <= 2^31
+		 *   0 <= wa <= 2^31 - 1
+		 *   0 <= wb <= 2^31 - 1
+		 *   |cca| <= 2^32 - 1
+		 * Then:
+		 *   |za| <= (2^31-1)*(2^32) + (2^32-1) = 2^63 - 1
+		 *
+		 * Thus, the new value of cca is such that |cca| <= 2^32 - 1.
+		 * The same applies to ccb.
+		 */
+		wa = a[k];
+		wb = b[k];
+		za = wa * (uint64_t)pa + wb * (uint64_t)pb + (uint64_t)cca;
+		zb = wa * (uint64_t)qa + wb * (uint64_t)qb + (uint64_t)ccb;
+		if (k > 0) {
+			a[k - 1] = za & 0x7FFFFFFF;
+			b[k - 1] = zb & 0x7FFFFFFF;
+		}
+
+		/*
+		 * For the new values of cca and ccb, we need a signed
+		 * right-shift; since, in C, right-shifting a signed
+		 * negative value is implementation-defined, we use a
+		 * custom portable sign extension expression.
+		 */
+#define M   ((uint64_t)1 << 32)
+		tta = za >> 31;
+		ttb = zb >> 31;
+		tta = (tta ^ M) - M;
+		ttb = (ttb ^ M) - M;
+		cca = *(int64_t *)&tta;
+		ccb = *(int64_t *)&ttb;
+#undef M
+	}
+	a[len - 1] = (uint32_t)cca;
+	b[len - 1] = (uint32_t)ccb;
+
+	nega = (uint32_t)((uint64_t)cca >> 63);
+	negb = (uint32_t)((uint64_t)ccb >> 63);
+	cond_negate(a, len, nega);
+	cond_negate(b, len, negb);
+	return nega | (negb << 1);
+}
+
+/*
+ * Compute:
+ *   a <- (a*pa+b*pb)/(2^31) mod m
+ *   b <- (a*qa+b*qb)/(2^31) mod m
+ *
+ * m0i is equal to -1/m[0] mod 2^31.
+ *
+ * Factors pa, pb, qa and qb must be at most 2^31 in absolute value.
+ * Source integers a and b must be nonnegative; top word is not allowed
+ * to contain an extra 32th bit.
+ */
+static void
+co_reduce_mod(uint32_t *a, uint32_t *b, size_t len,
+	int64_t pa, int64_t pb, int64_t qa, int64_t qb,
+	const uint32_t *m, uint32_t m0i)
+{
+	size_t k;
+	int64_t cca, ccb;
+	uint32_t fa, fb;
+
+	cca = 0;
+	ccb = 0;
+	fa = ((a[0] * (uint32_t)pa + b[0] * (uint32_t)pb) * m0i) & 0x7FFFFFFF;
+	fb = ((a[0] * (uint32_t)qa + b[0] * (uint32_t)qb) * m0i) & 0x7FFFFFFF;
+	for (k = 0; k < len; k ++) {
+		uint32_t wa, wb;
+		uint64_t za, zb;
+		uint64_t tta, ttb;
+
+		/*
+		 * In this loop, carries 'cca' and 'ccb' always fit on
+		 * 33 bits (in absolute value).
+		 */
+		wa = a[k];
+		wb = b[k];
+		za = wa * (uint64_t)pa + wb * (uint64_t)pb
+			+ m[k] * (uint64_t)fa + (uint64_t)cca;
+		zb = wa * (uint64_t)qa + wb * (uint64_t)qb
+			+ m[k] * (uint64_t)fb + (uint64_t)ccb;
+		if (k > 0) {
+			a[k - 1] = (uint32_t)za & 0x7FFFFFFF;
+			b[k - 1] = (uint32_t)zb & 0x7FFFFFFF;
+		}
+
+#define M   ((uint64_t)1 << 32)
+		tta = za >> 31;
+		ttb = zb >> 31;
+		tta = (tta ^ M) - M;
+		ttb = (ttb ^ M) - M;
+		cca = *(int64_t *)&tta;
+		ccb = *(int64_t *)&ttb;
+#undef M
+	}
+	a[len - 1] = (uint32_t)cca;
+	b[len - 1] = (uint32_t)ccb;
+
+	/*
+	 * At this point:
+	 *   -m <= a < 2*m
+	 *   -m <= b < 2*m
+	 * (this is a case of Montgomery reduction)
+	 * The top word of 'a' and 'b' may have a 32-th bit set.
+	 * We may have to add or subtract the modulus.
+	 */
+	finish_mod(a, len, m, (uint32_t)((uint64_t)cca >> 63));
+	finish_mod(b, len, m, (uint32_t)((uint64_t)ccb >> 63));
+}
+
+/* see inner.h */
+uint32_t
+br_i31_moddiv(uint32_t *x, const uint32_t *y, const uint32_t *m, uint32_t m0i,
+	uint32_t *t)
+{
+	/*
+	 * Algorithm is an extended binary GCD. We maintain four values
+	 * a, b, u and v, with the following invariants:
+	 *
+	 *   a * x = y * u mod m
+	 *   b * x = y * v mod m
+	 *
+	 * Starting values are:
+	 *
+	 *   a = y
+	 *   b = m
+	 *   u = x
+	 *   v = 0
+	 *
+	 * The formal definition of the algorithm is a sequence of steps:
+	 *
+	 *   - If a is even, then a <- a/2 and u <- u/2 mod m.
+	 *   - Otherwise, if b is even, then b <- b/2 and v <- v/2 mod m.
+	 *   - Otherwise, if a > b, then a <- (a-b)/2 and u <- (u-v)/2 mod m.
+	 *   - Otherwise, b <- (b-a)/2 and v <- (v-u)/2 mod m.
+	 *
+	 * Algorithm stops when a = b. At that point, they both are equal
+	 * to GCD(y,m); the modular division succeeds if that value is 1.
+	 * The result of the modular division is then u (or v: both are
+	 * equal at that point).
+	 *
+	 * Each step makes either a or b shrink by at least one bit; hence,
+	 * if m has bit length k bits, then 2k-2 steps are sufficient.
+	 *
+	 *
+	 * Though complexity is quadratic in the size of m, the bit-by-bit
+	 * processing is not very efficient. We can speed up processing by
+	 * remarking that the decisions are taken based only on observation
+	 * of the top and low bits of a and b.
+	 *
+	 * In the loop below, at each iteration, we use the two top words
+	 * of a and b, and the low words of a and b, to compute reduction
+	 * parameters pa, pb, qa and qb such that the new values for a
+	 * and b are:
+	 *
+	 *   a' = (a*pa + b*pb) / (2^31)
+	 *   b' = (a*qa + b*qb) / (2^31)
+	 *
+	 * the division being exact.
+	 *
+	 * Since the choices are based on the top words, they may be slightly
+	 * off, requiring an optional correction: if a' < 0, then we replace
+	 * pa with -pa, and pb with -pb. The total length of a and b is
+	 * thus reduced by at least 30 bits at each iteration.
+	 *
+	 * The stopping conditions are still the same, though: when a
+	 * and b become equal, they must be both odd (since m is odd,
+	 * the GCD cannot be even), therefore the next operation is a
+	 * subtraction, and one of the values becomes 0. At that point,
+	 * nothing else happens, i.e. one value is stuck at 0, and the
+	 * other one is the GCD.
+	 */
+	size_t len, k;
+	uint32_t *a, *b, *u, *v;
+	uint32_t num, r;
+
+	len = (m[0] + 31) >> 5;
+	a = t;
+	b = a + len;
+	u = x + 1;
+	v = b + len;
+	memcpy(a, y + 1, len * sizeof *y);
+	memcpy(b, m + 1, len * sizeof *m);
+	memset(v, 0, len * sizeof *v);
+
+	/*
+	 * Loop below ensures that a and b are reduced by some bits each,
+	 * for a total of at least 30 bits.
+	 */
+	for (num = ((m[0] - (m[0] >> 5)) << 1) + 30; num >= 30; num -= 30) {
+		size_t j;
+		uint32_t c0, c1;
+		uint32_t a0, a1, b0, b1;
+		uint64_t a_hi, b_hi;
+		uint32_t a_lo, b_lo;
+		int64_t pa, pb, qa, qb;
+		int i;
+
+		/*
+		 * Extract top words of a and b. If j is the highest
+		 * index >= 1 such that a[j] != 0 or b[j] != 0, then we want
+		 * (a[j] << 31) + a[j - 1], and (b[j] << 31) + b[j - 1].
+		 * If a and b are down to one word each, then we use a[0]
+		 * and b[0].
+		 */
+		c0 = (uint32_t)-1;
+		c1 = (uint32_t)-1;
+		a0 = 0;
+		a1 = 0;
+		b0 = 0;
+		b1 = 0;
+		j = len;
+		while (j -- > 0) {
+			uint32_t aw, bw;
+
+			aw = a[j];
+			bw = b[j];
+			a0 ^= (a0 ^ aw) & c0;
+			a1 ^= (a1 ^ aw) & c1;
+			b0 ^= (b0 ^ bw) & c0;
+			b1 ^= (b1 ^ bw) & c1;
+			c1 = c0;
+			c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - (uint32_t)1;
+		}
+
+		/*
+		 * If c1 = 0, then we grabbed two words for a and b.
+		 * If c1 != 0 but c0 = 0, then we grabbed one word. It
+		 * is not possible that c1 != 0 and c0 != 0, because that
+		 * would mean that both integers are zero.
+		 */
+		a1 |= a0 & c1;
+		a0 &= ~c1;
+		b1 |= b0 & c1;
+		b0 &= ~c1;
+		a_hi = ((uint64_t)a0 << 31) + a1;
+		b_hi = ((uint64_t)b0 << 31) + b1;
+		a_lo = a[0];
+		b_lo = b[0];
+
+		/*
+		 * Compute reduction factors:
+		 *
+		 *   a' = a*pa + b*pb
+		 *   b' = a*qa + b*qb
+		 *
+		 * such that a' and b' are both multiple of 2^31, but are
+		 * only marginally larger than a and b.
+		 */
+		pa = 1;
+		pb = 0;
+		qa = 0;
+		qb = 1;
+		for (i = 0; i < 31; i ++) {
+			/*
+			 * At each iteration:
+			 *
+			 *   a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi
+			 *   b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi
+			 *   a <- a/2 if: a is even
+			 *   b <- b/2 if: a is odd, b is even
+			 *
+			 * We multiply a_lo and b_lo by 2 at each
+			 * iteration, thus a division by 2 really is a
+			 * non-multiplication by 2.
+			 */
+			uint32_t r, oa, ob, cAB, cBA, cA;
+			uint64_t rz;
+
+			/*
+			 * r = GT(a_hi, b_hi)
+			 * But the GT() function works on uint32_t operands,
+			 * so we inline a 64-bit version here.
+			 */
+			rz = b_hi - a_hi;
+			r = (uint32_t)((rz ^ ((a_hi ^ b_hi)
+				& (a_hi ^ rz))) >> 63);
+
+			/*
+			 * cAB = 1 if b must be subtracted from a
+			 * cBA = 1 if a must be subtracted from b
+			 * cA = 1 if a is divided by 2, 0 otherwise
+			 *
+			 * Rules:
+			 *
+			 *   cAB and cBA cannot be both 1.
+			 *   if a is not divided by 2, b is.
+			 */
+			oa = (a_lo >> i) & 1;
+			ob = (b_lo >> i) & 1;
+			cAB = oa & ob & r;
+			cBA = oa & ob & NOT(r);
+			cA = cAB | NOT(oa);
+
+			/*
+			 * Conditional subtractions.
+			 */
+			a_lo -= b_lo & -cAB;
+			a_hi -= b_hi & -(uint64_t)cAB;
+			pa -= qa & -(int64_t)cAB;
+			pb -= qb & -(int64_t)cAB;
+			b_lo -= a_lo & -cBA;
+			b_hi -= a_hi & -(uint64_t)cBA;
+			qa -= pa & -(int64_t)cBA;
+			qb -= pb & -(int64_t)cBA;
+
+			/*
+			 * Shifting.
+			 */
+			a_lo += a_lo & (cA - 1);
+			pa += pa & ((int64_t)cA - 1);
+			pb += pb & ((int64_t)cA - 1);
+			a_hi ^= (a_hi ^ (a_hi >> 1)) & -(uint64_t)cA;
+			b_lo += b_lo & -cA;
+			qa += qa & -(int64_t)cA;
+			qb += qb & -(int64_t)cA;
+			b_hi ^= (b_hi ^ (b_hi >> 1)) & ((uint64_t)cA - 1);
+		}
+
+		/*
+		 * Replace a and b with new values a' and b'.
+		 */
+		r = co_reduce(a, b, len, pa, pb, qa, qb);
+		pa -= pa * ((r & 1) << 1);
+		pb -= pb * ((r & 1) << 1);
+		qa -= qa * (r & 2);
+		qb -= qb * (r & 2);
+		co_reduce_mod(u, v, len, pa, pb, qa, qb, m + 1, m0i);
+	}
+
+	/*
+	 * Now one of the arrays should be 0, and the other contains
+	 * the GCD. If a is 0, then u is 0 as well, and v contains
+	 * the division result.
+	 * Result is correct if and only if GCD is 1.
+	 */
+	r = (a[0] | b[0]) ^ 1;
+	u[0] |= v[0];
+	for (k = 1; k < len; k ++) {
+		r |= a[k] | b[k];
+		u[k] |= v[k];
+	}
+	return EQ0(r);
+}
diff --git a/third_party/bearssl/src/i31_modpow.c b/third_party/bearssl/src/i31_modpow.c
new file mode 100644
index 0000000..4ef3f5d
--- /dev/null
+++ b/third_party/bearssl/src/i31_modpow.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_modpow(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2)
+{
+	size_t mlen;
+	uint32_t k;
+
+	/*
+	 * 'mlen' is the length of m[] expressed in bytes (including
+	 * the "bit length" first field).
+	 */
+	mlen = ((m[0] + 63) >> 5) * sizeof m[0];
+
+	/*
+	 * Throughout the algorithm:
+	 * -- t1[] is in Montgomery representation; it contains x, x^2,
+	 * x^4, x^8...
+	 * -- The result is accumulated, in normal representation, in
+	 * the x[] array.
+	 * -- t2[] is used as destination buffer for each multiplication.
+	 *
+	 * Note that there is no need to call br_i32_from_monty().
+	 */
+	memcpy(t1, x, mlen);
+	br_i31_to_monty(t1, m);
+	br_i31_zero(x, m[0]);
+	x[1] = 1;
+	for (k = 0; k < ((uint32_t)elen << 3); k ++) {
+		uint32_t ctl;
+
+		ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1;
+		br_i31_montymul(t2, x, t1, m, m0i);
+		CCOPY(ctl, x, t2, mlen);
+		br_i31_montymul(t2, t1, t1, m, m0i);
+		memcpy(t1, t2, mlen);
+	}
+}
diff --git a/third_party/bearssl/src/i31_modpow2.c b/third_party/bearssl/src/i31_modpow2.c
new file mode 100644
index 0000000..0b8f8cf
--- /dev/null
+++ b/third_party/bearssl/src/i31_modpow2.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_modpow_opt(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen)
+{
+	size_t mlen, mwlen;
+	uint32_t *t1, *t2, *base;
+	size_t u, v;
+	uint32_t acc;
+	int acc_len, win_len;
+
+	/*
+	 * Get modulus size.
+	 */
+	mwlen = (m[0] + 63) >> 5;
+	mlen = mwlen * sizeof m[0];
+	mwlen += (mwlen & 1);
+	t1 = tmp;
+	t2 = tmp + mwlen;
+
+	/*
+	 * Compute possible window size, with a maximum of 5 bits.
+	 * When the window has size 1 bit, we use a specific code
+	 * that requires only two temporaries. Otherwise, for a
+	 * window of k bits, we need 2^k+1 temporaries.
+	 */
+	if (twlen < (mwlen << 1)) {
+		return 0;
+	}
+	for (win_len = 5; win_len > 1; win_len --) {
+		if ((((uint32_t)1 << win_len) + 1) * mwlen <= twlen) {
+			break;
+		}
+	}
+
+	/*
+	 * Everything is done in Montgomery representation.
+	 */
+	br_i31_to_monty(x, m);
+
+	/*
+	 * Compute window contents. If the window has size one bit only,
+	 * then t2 is set to x; otherwise, t2[0] is left untouched, and
+	 * t2[k] is set to x^k (for k >= 1).
+	 */
+	if (win_len == 1) {
+		memcpy(t2, x, mlen);
+	} else {
+		memcpy(t2 + mwlen, x, mlen);
+		base = t2 + mwlen;
+		for (u = 2; u < ((unsigned)1 << win_len); u ++) {
+			br_i31_montymul(base + mwlen, base, x, m, m0i);
+			base += mwlen;
+		}
+	}
+
+	/*
+	 * We need to set x to 1, in Montgomery representation. This can
+	 * be done efficiently by setting the high word to 1, then doing
+	 * one word-sized shift.
+	 */
+	br_i31_zero(x, m[0]);
+	x[(m[0] + 31) >> 5] = 1;
+	br_i31_muladd_small(x, 0, m);
+
+	/*
+	 * We process bits from most to least significant. At each
+	 * loop iteration, we have acc_len bits in acc.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (acc_len > 0 || elen > 0) {
+		int i, k;
+		uint32_t bits;
+
+		/*
+		 * Get the next bits.
+		 */
+		k = win_len;
+		if (acc_len < win_len) {
+			if (elen > 0) {
+				acc = (acc << 8) | *e ++;
+				elen --;
+				acc_len += 8;
+			} else {
+				k = acc_len;
+			}
+		}
+		bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1);
+		acc_len -= k;
+
+		/*
+		 * We could get exactly k bits. Compute k squarings.
+		 */
+		for (i = 0; i < k; i ++) {
+			br_i31_montymul(t1, x, x, m, m0i);
+			memcpy(x, t1, mlen);
+		}
+
+		/*
+		 * Window lookup: we want to set t2 to the window
+		 * lookup value, assuming the bits are non-zero. If
+		 * the window length is 1 bit only, then t2 is
+		 * already set; otherwise, we do a constant-time lookup.
+		 */
+		if (win_len > 1) {
+			br_i31_zero(t2, m[0]);
+			base = t2 + mwlen;
+			for (u = 1; u < ((uint32_t)1 << k); u ++) {
+				uint32_t mask;
+
+				mask = -EQ(u, bits);
+				for (v = 1; v < mwlen; v ++) {
+					t2[v] |= mask & base[v];
+				}
+				base += mwlen;
+			}
+		}
+
+		/*
+		 * Multiply with the looked-up value. We keep the
+		 * product only if the exponent bits are not all-zero.
+		 */
+		br_i31_montymul(t1, x, t2, m, m0i);
+		CCOPY(NEQ(bits, 0), x, t1, mlen);
+	}
+
+	/*
+	 * Convert back from Montgomery representation, and exit.
+	 */
+	br_i31_from_monty(x, m, m0i);
+	return 1;
+}
diff --git a/third_party/bearssl/src/i31_montmul.c b/third_party/bearssl/src/i31_montmul.c
new file mode 100644
index 0000000..758f8f4
--- /dev/null
+++ b/third_party/bearssl/src/i31_montmul.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i)
+{
+	/*
+	 * Each outer loop iteration computes:
+	 *   d <- (d + xu*y + f*m) / 2^31
+	 * We have xu <= 2^31-1 and f <= 2^31-1.
+	 * Thus, if d <= 2*m-1 on input, then:
+	 *   2*m-1 + 2*(2^31-1)*m <= (2^32)*m-1
+	 * and the new d value is less than 2*m.
+	 *
+	 * We represent d over 31-bit words, with an extra word 'dh'
+	 * which can thus be only 0 or 1.
+	 */
+	size_t len, len4, u, v;
+	uint32_t dh;
+
+	len = (m[0] + 31) >> 5;
+	len4 = len & ~(size_t)3;
+	br_i31_zero(d, m[0]);
+	dh = 0;
+	for (u = 0; u < len; u ++) {
+		/*
+		 * The carry for each operation fits on 32 bits:
+		 *   d[v+1] <= 2^31-1
+		 *   xu*y[v+1] <= (2^31-1)*(2^31-1)
+		 *   f*m[v+1] <= (2^31-1)*(2^31-1)
+		 *   r <= 2^32-1
+		 *   (2^31-1) + 2*(2^31-1)*(2^31-1) + (2^32-1) = 2^63 - 2^31
+		 * After division by 2^31, the new r is then at most 2^32-1
+		 *
+		 * Using a 32-bit carry has performance benefits on 32-bit
+		 * systems; however, on 64-bit architectures, we prefer to
+		 * keep the carry (r) in a 64-bit register, thus avoiding some
+		 * "clear high bits" operations.
+		 */
+		uint32_t f, xu;
+#if BR_64
+		uint64_t r;
+#else
+		uint32_t r;
+#endif
+
+		xu = x[u + 1];
+		f = MUL31_lo((d[1] + MUL31_lo(x[u + 1], y[1])), m0i);
+
+		r = 0;
+		for (v = 0; v < len4; v += 4) {
+			uint64_t z;
+
+			z = (uint64_t)d[v + 1] + MUL31(xu, y[v + 1])
+				+ MUL31(f, m[v + 1]) + r;
+			r = z >> 31;
+			d[v + 0] = (uint32_t)z & 0x7FFFFFFF;
+			z = (uint64_t)d[v + 2] + MUL31(xu, y[v + 2])
+				+ MUL31(f, m[v + 2]) + r;
+			r = z >> 31;
+			d[v + 1] = (uint32_t)z & 0x7FFFFFFF;
+			z = (uint64_t)d[v + 3] + MUL31(xu, y[v + 3])
+				+ MUL31(f, m[v + 3]) + r;
+			r = z >> 31;
+			d[v + 2] = (uint32_t)z & 0x7FFFFFFF;
+			z = (uint64_t)d[v + 4] + MUL31(xu, y[v + 4])
+				+ MUL31(f, m[v + 4]) + r;
+			r = z >> 31;
+			d[v + 3] = (uint32_t)z & 0x7FFFFFFF;
+		}
+		for (; v < len; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)d[v + 1] + MUL31(xu, y[v + 1])
+				+ MUL31(f, m[v + 1]) + r;
+			r = z >> 31;
+			d[v] = (uint32_t)z & 0x7FFFFFFF;
+		}
+
+		/*
+		 * Since the new dh can only be 0 or 1, the addition of
+		 * the old dh with the carry MUST fit on 32 bits, and
+		 * thus can be done into dh itself.
+		 */
+		dh += r;
+		d[len] = dh & 0x7FFFFFFF;
+		dh >>= 31;
+	}
+
+	/*
+	 * We must write back the bit length because it was overwritten in
+	 * the loop (not overwriting it would require a test in the loop,
+	 * which would yield bigger and slower code).
+	 */
+	d[0] = m[0];
+
+	/*
+	 * d[] may still be greater than m[] at that point; notably, the
+	 * 'dh' word may be non-zero.
+	 */
+	br_i31_sub(d, m, NEQ(dh, 0) | NOT(br_i31_sub(d, m, 0)));
+}
diff --git a/third_party/bearssl/src/i31_mulacc.c b/third_party/bearssl/src/i31_mulacc.c
new file mode 100644
index 0000000..7410e54
--- /dev/null
+++ b/third_party/bearssl/src/i31_mulacc.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	size_t alen, blen, u;
+	uint32_t dl, dh;
+
+	alen = (a[0] + 31) >> 5;
+	blen = (b[0] + 31) >> 5;
+
+	/*
+	 * We want to add the two bit lengths, but these are encoded,
+	 * which requires some extra care.
+	 */
+	dl = (a[0] & 31) + (b[0] & 31);
+	dh = (a[0] >> 5) + (b[0] >> 5);
+	d[0] = (dh << 5) + dl + (~(uint32_t)(dl - 31) >> 31);
+
+	for (u = 0; u < blen; u ++) {
+		uint32_t f;
+		size_t v;
+
+		/*
+		 * Carry always fits on 31 bits; we want to keep it in a
+		 * 32-bit register on 32-bit architectures (on a 64-bit
+		 * architecture, cast down from 64 to 32 bits means
+		 * clearing the high bits, which is not free; on a 32-bit
+		 * architecture, the same operation really means ignoring
+		 * the top register, which has negative or zero cost).
+		 */
+#if BR_64
+		uint64_t cc;
+#else
+		uint32_t cc;
+#endif
+
+		f = b[1 + u];
+		cc = 0;
+		for (v = 0; v < alen; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)d[1 + u + v] + MUL31(f, a[1 + v]) + cc;
+			cc = z >> 31;
+			d[1 + u + v] = (uint32_t)z & 0x7FFFFFFF;
+		}
+		d[1 + u + alen] = (uint32_t)cc;
+	}
+}
diff --git a/third_party/bearssl/src/i31_muladd.c b/third_party/bearssl/src/i31_muladd.c
new file mode 100644
index 0000000..eecd9e2
--- /dev/null
+++ b/third_party/bearssl/src/i31_muladd.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
+{
+	uint32_t m_bitlen;
+	unsigned mblr;
+	size_t u, mlen;
+	uint32_t a0, a1, b0, hi, g, q, tb;
+	uint32_t under, over;
+	uint32_t cc;
+
+	/*
+	 * We can test on the modulus bit length since we accept to
+	 * leak that length.
+	 */
+	m_bitlen = m[0];
+	if (m_bitlen == 0) {
+		return;
+	}
+	if (m_bitlen <= 31) {
+		uint32_t lo;
+
+		hi = x[1] >> 1;
+		lo = (x[1] << 31) | z;
+		x[1] = br_rem(hi, lo, m[1]);
+		return;
+	}
+	mlen = (m_bitlen + 31) >> 5;
+	mblr = (unsigned)m_bitlen & 31;
+
+	/*
+	 * Principle: we estimate the quotient (x*2^31+z)/m by
+	 * doing a 64/32 division with the high words.
+	 *
+	 * Let:
+	 *   w = 2^31
+	 *   a = (w*a0 + a1) * w^N + a2
+	 *   b = b0 * w^N + b2
+	 * such that:
+	 *   0 <= a0 < w
+	 *   0 <= a1 < w
+	 *   0 <= a2 < w^N
+	 *   w/2 <= b0 < w
+	 *   0 <= b2 < w^N
+	 *   a < w*b
+	 * I.e. the two top words of a are a0:a1, the top word of b is
+	 * b0, we ensured that b0 is "full" (high bit set), and a is
+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
+	 *
+	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
+	 * doing an Euclidean division on the top words:
+	 *   a0*w+a1 = b0*u + v  (with 0 <= v < b0)
+	 * Then the following holds:
+	 *   0 <= u <= w
+	 *   u-2 <= q <= u
+	 */
+	hi = x[mlen];
+	if (mblr == 0) {
+		a0 = x[mlen];
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a1 = x[mlen];
+		b0 = m[mlen];
+	} else {
+		a0 = ((x[mlen] << (31 - mblr)) | (x[mlen - 1] >> mblr))
+			& 0x7FFFFFFF;
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a1 = ((x[mlen] << (31 - mblr)) | (x[mlen - 1] >> mblr))
+			& 0x7FFFFFFF;
+		b0 = ((m[mlen] << (31 - mblr)) | (m[mlen - 1] >> mblr))
+			& 0x7FFFFFFF;
+	}
+
+	/*
+	 * We estimate a divisor q. If the quotient returned by br_div()
+	 * is g:
+	 * -- If a0 == b0 then g == 0; we want q = 0x7FFFFFFF.
+	 * -- Otherwise:
+	 *    -- if g == 0 then we set q = 0;
+	 *    -- otherwise, we set q = g - 1.
+	 * The properties described above then ensure that the true
+	 * quotient is q-1, q or q+1.
+	 *
+	 * Take care that a0, a1 and b0 are 31-bit words, not 32-bit. We
+	 * must adjust the parameters to br_div() accordingly.
+	 */
+	g = br_div(a0 >> 1, a1 | (a0 << 31), b0);
+	q = MUX(EQ(a0, b0), 0x7FFFFFFF, MUX(EQ(g, 0), 0, g - 1));
+
+	/*
+	 * We subtract q*m from x (with the extra high word of value 'hi').
+	 * Since q may be off by 1 (in either direction), we may have to
+	 * add or subtract m afterwards.
+	 *
+	 * The 'tb' flag will be true (1) at the end of the loop if the
+	 * result is greater than or equal to the modulus (not counting
+	 * 'hi' or the carry).
+	 */
+	cc = 0;
+	tb = 1;
+	for (u = 1; u <= mlen; u ++) {
+		uint32_t mw, zw, xw, nxw;
+		uint64_t zl;
+
+		mw = m[u];
+		zl = MUL31(mw, q) + cc;
+		cc = (uint32_t)(zl >> 31);
+		zw = (uint32_t)zl & (uint32_t)0x7FFFFFFF;
+		xw = x[u];
+		nxw = xw - zw;
+		cc += nxw >> 31;
+		nxw &= 0x7FFFFFFF;
+		x[u] = nxw;
+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
+	}
+
+	/*
+	 * If we underestimated q, then either cc < hi (one extra bit
+	 * beyond the top array word), or cc == hi and tb is true (no
+	 * extra bit, but the result is not lower than the modulus). In
+	 * these cases we must subtract m once.
+	 *
+	 * Otherwise, we may have overestimated, which will show as
+	 * cc > hi (thus a negative result). Correction is adding m once.
+	 */
+	over = GT(cc, hi);
+	under = ~over & (tb | LT(cc, hi));
+	br_i31_add(x, m, over);
+	br_i31_sub(x, m, under);
+}
diff --git a/third_party/bearssl/src/i31_ninv31.c b/third_party/bearssl/src/i31_ninv31.c
new file mode 100644
index 0000000..dd83c96
--- /dev/null
+++ b/third_party/bearssl/src/i31_ninv31.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_ninv31(uint32_t x)
+{
+	uint32_t y;
+
+	y = 2 - x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	return MUX(x & 1, -y, 0) & 0x7FFFFFFF;
+}
diff --git a/third_party/bearssl/src/i31_reduce.c b/third_party/bearssl/src/i31_reduce.c
new file mode 100644
index 0000000..5c9523e
--- /dev/null
+++ b/third_party/bearssl/src/i31_reduce.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
+{
+	uint32_t m_bitlen, a_bitlen;
+	size_t mlen, alen, u;
+
+	m_bitlen = m[0];
+	mlen = (m_bitlen + 31) >> 5;
+
+	x[0] = m_bitlen;
+	if (m_bitlen == 0) {
+		return;
+	}
+
+	/*
+	 * If the source is shorter, then simply copy all words from a[]
+	 * and zero out the upper words.
+	 */
+	a_bitlen = a[0];
+	alen = (a_bitlen + 31) >> 5;
+	if (a_bitlen < m_bitlen) {
+		memcpy(x + 1, a + 1, alen * sizeof *a);
+		for (u = alen; u < mlen; u ++) {
+			x[u + 1] = 0;
+		}
+		return;
+	}
+
+	/*
+	 * The source length is at least equal to that of the modulus.
+	 * We must thus copy N-1 words, and input the remaining words
+	 * one by one.
+	 */
+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
+	x[mlen] = 0;
+	for (u = 1 + alen - mlen; u > 0; u --) {
+		br_i31_muladd_small(x, a[u], m);
+	}
+}
diff --git a/third_party/bearssl/src/i31_rshift.c b/third_party/bearssl/src/i31_rshift.c
new file mode 100644
index 0000000..db6ba0b
--- /dev/null
+++ b/third_party/bearssl/src/i31_rshift.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_rshift(uint32_t *x, int count)
+{
+	size_t u, len;
+	uint32_t r;
+
+	len = (x[0] + 31) >> 5;
+	if (len == 0) {
+		return;
+	}
+	r = x[1] >> count;
+	for (u = 2; u <= len; u ++) {
+		uint32_t w;
+
+		w = x[u];
+		x[u - 1] = ((w << (31 - count)) | r) & 0x7FFFFFFF;
+		r = w >> count;
+	}
+	x[len] = r;
+}
diff --git a/third_party/bearssl/src/i31_sub.c b/third_party/bearssl/src/i31_sub.c
new file mode 100644
index 0000000..3910895
--- /dev/null
+++ b/third_party/bearssl/src/i31_sub.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i31_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw - bw - cc;
+		cc = naw >> 31;
+		a[u] = MUX(ctl, naw & 0x7FFFFFFF, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i31_tmont.c b/third_party/bearssl/src/i31_tmont.c
new file mode 100644
index 0000000..4798ff6
--- /dev/null
+++ b/third_party/bearssl/src/i31_tmont.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i31_to_monty(uint32_t *x, const uint32_t *m)
+{
+	uint32_t k;
+
+	for (k = (m[0] + 31) >> 5; k > 0; k --) {
+		br_i31_muladd_small(x, 0, m);
+	}
+}
diff --git a/third_party/bearssl/src/i32_add.c b/third_party/bearssl/src/i32_add.c
new file mode 100644
index 0000000..620baff
--- /dev/null
+++ b/third_party/bearssl/src/i32_add.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw + bw + cc;
+
+		/*
+		 * Carry is 1 if naw < aw. Carry is also 1 if naw == aw
+		 * AND the carry was already 1.
+		 */
+		cc = (cc & EQ(naw, aw)) | LT(naw, aw);
+		a[u] = MUX(ctl, naw, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i32_bitlen.c b/third_party/bearssl/src/i32_bitlen.c
new file mode 100644
index 0000000..40ce9fa
--- /dev/null
+++ b/third_party/bearssl/src/i32_bitlen.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_bit_length(uint32_t *x, size_t xlen)
+{
+	uint32_t tw, twk;
+
+	tw = 0;
+	twk = 0;
+	while (xlen -- > 0) {
+		uint32_t w, c;
+
+		c = EQ(tw, 0);
+		w = x[xlen];
+		tw = MUX(c, w, tw);
+		twk = MUX(c, (uint32_t)xlen, twk);
+	}
+	return (twk << 5) + BIT_LENGTH(tw);
+}
diff --git a/third_party/bearssl/src/i32_decmod.c b/third_party/bearssl/src/i32_decmod.c
new file mode 100644
index 0000000..a859af1
--- /dev/null
+++ b/third_party/bearssl/src/i32_decmod.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_decode_mod(uint32_t *x, const void *src, size_t len, const uint32_t *m)
+{
+	const unsigned char *buf;
+	uint32_t r;
+	size_t u, v, mlen;
+
+	buf = src;
+
+	/*
+	 * First pass: determine whether the value fits. The 'r' value
+	 * will contain the comparison result, as 0x00000000 (value is
+	 * equal to the modulus), 0x00000001 (value is greater than the
+	 * modulus), or 0xFFFFFFFF (value is lower than the modulus).
+	 */
+	mlen = (m[0] + 7) >> 3;
+	r = 0;
+	for (u = (mlen > len) ? mlen : len; u > 0; u --) {
+		uint32_t mb, xb;
+
+		v = u - 1;
+		if (v >= mlen) {
+			mb = 0;
+		} else {
+			mb = (m[1 + (v >> 2)] >> ((v & 3) << 3)) & 0xFF;
+		}
+		if (v >= len) {
+			xb = 0;
+		} else {
+			xb = buf[len - u];
+		}
+		r = MUX(EQ(r, 0), (uint32_t)CMP(xb, mb), r);
+	}
+
+	/*
+	 * Only r == 0xFFFFFFFF is acceptable. We want to set r to 0xFF if
+	 * the value fits, 0x00 otherwise.
+	 */
+	r >>= 24;
+	br_i32_zero(x, m[0]);
+	u = (mlen > len) ? len : mlen;
+	while (u > 0) {
+		uint32_t xb;
+
+		xb = buf[len - u] & r;
+		u --;
+		x[1 + (u >> 2)] |= xb << ((u & 3) << 3);
+	}
+	return r >> 7;
+}
diff --git a/third_party/bearssl/src/i32_decode.c b/third_party/bearssl/src/i32_decode.c
new file mode 100644
index 0000000..f289038
--- /dev/null
+++ b/third_party/bearssl/src/i32_decode.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_decode(uint32_t *x, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	size_t u, v;
+
+	buf = src;
+	u = len;
+	v = 1;
+	for (;;) {
+		if (u < 4) {
+			uint32_t w;
+
+			if (u < 2) {
+				if (u == 0) {
+					break;
+				} else {
+					w = buf[0];
+				}
+			} else {
+				if (u == 2) {
+					w = br_dec16be(buf);
+				} else {
+					w = ((uint32_t)buf[0] << 16)
+						| br_dec16be(buf + 1);
+				}
+			}
+			x[v ++] = w;
+			break;
+		} else {
+			u -= 4;
+			x[v ++] = br_dec32be(buf + u);
+		}
+	}
+	x[0] = br_i32_bit_length(x + 1, v - 1);
+}
diff --git a/third_party/bearssl/src/i32_decred.c b/third_party/bearssl/src/i32_decred.c
new file mode 100644
index 0000000..dc476db
--- /dev/null
+++ b/third_party/bearssl/src/i32_decred.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_decode_reduce(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m)
+{
+	uint32_t m_bitlen;
+	size_t mblen, k, q;
+	const unsigned char *buf;
+
+	m_bitlen = m[0];
+
+	/*
+	 * Special case for an invalid modulus.
+	 */
+	if (m_bitlen == 0) {
+		x[0] = 0;
+		return;
+	}
+
+	/*
+	 * Clear the destination.
+	 */
+	br_i32_zero(x, m_bitlen);
+
+	/*
+	 * First decode directly as many bytes as possible without
+	 * reduction, taking care to leave a number of bytes which
+	 * is a multiple of 4.
+	 */
+	mblen = (m_bitlen + 7) >> 3;
+	k = mblen - 1;
+
+	/*
+	 * Up to k bytes can be safely decoded.
+	 */
+	if (k >= len) {
+		br_i32_decode(x, src, len);
+		x[0] = m_bitlen;
+		return;
+	}
+
+	/*
+	 * We want to first inject some bytes with direct decoding,
+	 * then extra bytes by whole 32-bit words. First compute
+	 * the size that should be injected that way.
+	 */
+	buf = src;
+	q = (len - k + 3) & ~(size_t)3;
+
+	/*
+	 * It may happen that this is more than what we already have
+	 * (by at most 3 bytes). Such a case may happen only with
+	 * a very short modulus. In that case, we must process the first
+	 * bytes "manually".
+	 */
+	if (q > len) {
+		int i;
+		uint32_t w;
+
+		w = 0;
+		for (i = 0; i < 4; i ++) {
+			w <<= 8;
+			if (q <= len) {
+				w |= buf[len - q];
+			}
+			q --;
+		}
+		br_i32_muladd_small(x, w, m);
+	} else {
+		br_i32_decode(x, buf, len - q);
+		x[0] = m_bitlen;
+	}
+
+	/*
+	 * At that point, we have exactly q bytes to inject, and q is
+	 * a multiple of 4.
+	 */
+	for (k = len - q; k < len; k += 4) {
+		br_i32_muladd_small(x, br_dec32be(buf + k), m);
+	}
+}
diff --git a/third_party/bearssl/src/i32_div32.c b/third_party/bearssl/src/i32_div32.c
new file mode 100644
index 0000000..d8b8023
--- /dev/null
+++ b/third_party/bearssl/src/i32_div32.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r)
+{
+	/* TODO: optimize this */
+	uint32_t q;
+	uint32_t ch, cf;
+	int k;
+
+	q = 0;
+	ch = EQ(hi, d);
+	hi = MUX(ch, 0, hi);
+	for (k = 31; k > 0; k --) {
+		int j;
+		uint32_t w, ctl, hi2, lo2;
+
+		j = 32 - k;
+		w = (hi << j) | (lo >> k);
+		ctl = GE(w, d) | (hi >> k);
+		hi2 = (w - d) >> j;
+		lo2 = lo - (d << k);
+		hi = MUX(ctl, hi2, hi);
+		lo = MUX(ctl, lo2, lo);
+		q |= ctl << k;
+	}
+	cf = GE(lo, d) | hi;
+	q |= cf;
+	*r = MUX(cf, lo - d, lo);
+	return q;
+}
diff --git a/third_party/bearssl/src/i32_encode.c b/third_party/bearssl/src/i32_encode.c
new file mode 100644
index 0000000..303652f
--- /dev/null
+++ b/third_party/bearssl/src/i32_encode.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_encode(void *dst, size_t len, const uint32_t *x)
+{
+	unsigned char *buf;
+	size_t k;
+
+	buf = dst;
+
+	/*
+	 * Compute the announced size of x in bytes; extra bytes are
+	 * filled with zeros.
+	 */
+	k = (x[0] + 7) >> 3;
+	while (len > k) {
+		*buf ++ = 0;
+		len --;
+	}
+
+	/*
+	 * Now we use k as index within x[]. That index starts at 1;
+	 * we initialize it to the topmost complete word, and process
+	 * any remaining incomplete word.
+	 */
+	k = (len + 3) >> 2;
+	switch (len & 3) {
+	case 3:
+		*buf ++ = x[k] >> 16;
+		/* fall through */
+	case 2:
+		*buf ++ = x[k] >> 8;
+		/* fall through */
+	case 1:
+		*buf ++ = x[k];
+		k --;
+	}
+
+	/*
+	 * Encode all complete words.
+	 */
+	while (k > 0) {
+		br_enc32be(buf, x[k]);
+		k --;
+		buf += 4;
+	}
+}
diff --git a/third_party/bearssl/src/i32_fmont.c b/third_party/bearssl/src/i32_fmont.c
new file mode 100644
index 0000000..dc1c934
--- /dev/null
+++ b/third_party/bearssl/src/i32_fmont.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i)
+{
+	size_t len, u, v;
+
+	len = (m[0] + 31) >> 5;
+	for (u = 0; u < len; u ++) {
+		uint32_t f;
+		uint64_t cc;
+
+		f = x[1] * m0i;
+		cc = 0;
+		for (v = 0; v < len; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)x[v + 1] + MUL(f, m[v + 1]) + cc;
+			cc = z >> 32;
+			if (v != 0) {
+				x[v] = (uint32_t)z;
+			}
+		}
+		x[len] = (uint32_t)cc;
+	}
+
+	/*
+	 * We may have to do an extra subtraction, but only if the
+	 * value in x[] is indeed greater than or equal to that of m[],
+	 * which is why we must do two calls (first call computes the
+	 * carry, second call performs the subtraction only if the carry
+	 * is 0).
+	 */
+	br_i32_sub(x, m, NOT(br_i32_sub(x, m, 0)));
+}
diff --git a/third_party/bearssl/src/i32_iszero.c b/third_party/bearssl/src/i32_iszero.c
new file mode 100644
index 0000000..659df7f
--- /dev/null
+++ b/third_party/bearssl/src/i32_iszero.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_iszero(const uint32_t *x)
+{
+	uint32_t z;
+	size_t u;
+
+	z = 0;
+	for (u = (x[0] + 31) >> 5; u > 0; u --) {
+		z |= x[u];
+	}
+	return ~(z | -z) >> 31;
+}
diff --git a/third_party/bearssl/src/i32_modpow.c b/third_party/bearssl/src/i32_modpow.c
new file mode 100644
index 0000000..034aba0
--- /dev/null
+++ b/third_party/bearssl/src/i32_modpow.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_modpow(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2)
+{
+	size_t mlen;
+	uint32_t k;
+
+	/*
+	 * 'mlen' is the length of m[] expressed in bytes (including
+	 * the "bit length" first field).
+	 */
+	mlen = ((m[0] + 63) >> 5) * sizeof m[0];
+
+	/*
+	 * Throughout the algorithm:
+	 * -- t1[] is in Montgomery representation; it contains x, x^2,
+	 * x^4, x^8...
+	 * -- The result is accumulated, in normal representation, in
+	 * the x[] array.
+	 * -- t2[] is used as destination buffer for each multiplication.
+	 *
+	 * Note that there is no need to call br_i32_from_monty().
+	 */
+	memcpy(t1, x, mlen);
+	br_i32_to_monty(t1, m);
+	br_i32_zero(x, m[0]);
+	x[1] = 1;
+	for (k = 0; k < ((uint32_t)elen << 3); k ++) {
+		uint32_t ctl;
+
+		ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1;
+		br_i32_montymul(t2, x, t1, m, m0i);
+		CCOPY(ctl, x, t2, mlen);
+		br_i32_montymul(t2, t1, t1, m, m0i);
+		memcpy(t1, t2, mlen);
+	}
+}
diff --git a/third_party/bearssl/src/i32_montmul.c b/third_party/bearssl/src/i32_montmul.c
new file mode 100644
index 0000000..7edb376
--- /dev/null
+++ b/third_party/bearssl/src/i32_montmul.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i)
+{
+	size_t len, u, v;
+	uint64_t dh;
+
+	len = (m[0] + 31) >> 5;
+	br_i32_zero(d, m[0]);
+	dh = 0;
+	for (u = 0; u < len; u ++) {
+		uint32_t f, xu;
+		uint64_t r1, r2, zh;
+
+		xu = x[u + 1];
+		f = (d[1] + x[u + 1] * y[1]) * m0i;
+		r1 = 0;
+		r2 = 0;
+		for (v = 0; v < len; v ++) {
+			uint64_t z;
+			uint32_t t;
+
+			z = (uint64_t)d[v + 1] + MUL(xu, y[v + 1]) + r1;
+			r1 = z >> 32;
+			t = (uint32_t)z;
+			z = (uint64_t)t + MUL(f, m[v + 1]) + r2;
+			r2 = z >> 32;
+			if (v != 0) {
+				d[v] = (uint32_t)z;
+			}
+		}
+		zh = dh + r1 + r2;
+		d[len] = (uint32_t)zh;
+		dh = zh >> 32;
+	}
+
+	/*
+	 * d[] may still be greater than m[] at that point; notably, the
+	 * 'dh' word may be non-zero.
+	 */
+	br_i32_sub(d, m, NEQ(dh, 0) | NOT(br_i32_sub(d, m, 0)));
+}
diff --git a/third_party/bearssl/src/i32_mulacc.c b/third_party/bearssl/src/i32_mulacc.c
new file mode 100644
index 0000000..55da385
--- /dev/null
+++ b/third_party/bearssl/src/i32_mulacc.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b)
+{
+	size_t alen, blen, u;
+
+	alen = (a[0] + 31) >> 5;
+	blen = (b[0] + 31) >> 5;
+	d[0] = a[0] + b[0];
+	for (u = 0; u < blen; u ++) {
+		uint32_t f;
+		size_t v;
+#if BR_64
+		uint64_t cc;
+#else
+		uint32_t cc;
+#endif
+
+		f = b[1 + u];
+		cc = 0;
+		for (v = 0; v < alen; v ++) {
+			uint64_t z;
+
+			z = (uint64_t)d[1 + u + v] + MUL(f, a[1 + v]) + cc;
+			cc = z >> 32;
+			d[1 + u + v] = (uint32_t)z;
+		}
+		d[1 + u + alen] = (uint32_t)cc;
+	}
+}
diff --git a/third_party/bearssl/src/i32_muladd.c b/third_party/bearssl/src/i32_muladd.c
new file mode 100644
index 0000000..dd526ad
--- /dev/null
+++ b/third_party/bearssl/src/i32_muladd.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
+{
+	uint32_t m_bitlen;
+	size_t u, mlen;
+	uint32_t a0, a1, b0, hi, g, q, tb;
+	uint32_t chf, clow, under, over;
+	uint64_t cc;
+
+	/*
+	 * We can test on the modulus bit length since we accept to
+	 * leak that length.
+	 */
+	m_bitlen = m[0];
+	if (m_bitlen == 0) {
+		return;
+	}
+	if (m_bitlen <= 32) {
+		x[1] = br_rem(x[1], z, m[1]);
+		return;
+	}
+	mlen = (m_bitlen + 31) >> 5;
+
+	/*
+	 * Principle: we estimate the quotient (x*2^32+z)/m by
+	 * doing a 64/32 division with the high words.
+	 *
+	 * Let:
+	 *   w = 2^32
+	 *   a = (w*a0 + a1) * w^N + a2
+	 *   b = b0 * w^N + b2
+	 * such that:
+	 *   0 <= a0 < w
+	 *   0 <= a1 < w
+	 *   0 <= a2 < w^N
+	 *   w/2 <= b0 < w
+	 *   0 <= b2 < w^N
+	 *   a < w*b
+	 * I.e. the two top words of a are a0:a1, the top word of b is
+	 * b0, we ensured that b0 is "full" (high bit set), and a is
+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
+	 *
+	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
+	 * doing an Euclidean division on the top words:
+	 *   a0*w+a1 = b0*u + v  (with 0 <= v < w)
+	 * Then the following holds:
+	 *   0 <= u <= w
+	 *   u-2 <= q <= u
+	 */
+	a0 = br_i32_word(x, m_bitlen - 32);
+	hi = x[mlen];
+	memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+	x[1] = z;
+	a1 = br_i32_word(x, m_bitlen - 32);
+	b0 = br_i32_word(m, m_bitlen - 32);
+
+	/*
+	 * We estimate a divisor q. If the quotient returned by br_div()
+	 * is g:
+	 * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF.
+	 * -- Otherwise:
+	 *    -- if g == 0 then we set q = 0;
+	 *    -- otherwise, we set q = g - 1.
+	 * The properties described above then ensure that the true
+	 * quotient is q-1, q or q+1.
+	 */
+	g = br_div(a0, a1, b0);
+	q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1));
+
+	/*
+	 * We subtract q*m from x (with the extra high word of value 'hi').
+	 * Since q may be off by 1 (in either direction), we may have to
+	 * add or subtract m afterwards.
+	 *
+	 * The 'tb' flag will be true (1) at the end of the loop if the
+	 * result is greater than or equal to the modulus (not counting
+	 * 'hi' or the carry).
+	 */
+	cc = 0;
+	tb = 1;
+	for (u = 1; u <= mlen; u ++) {
+		uint32_t mw, zw, xw, nxw;
+		uint64_t zl;
+
+		mw = m[u];
+		zl = MUL(mw, q) + cc;
+		cc = (uint32_t)(zl >> 32);
+		zw = (uint32_t)zl;
+		xw = x[u];
+		nxw = xw - zw;
+		cc += (uint64_t)GT(nxw, xw);
+		x[u] = nxw;
+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
+	}
+
+	/*
+	 * If we underestimated q, then either cc < hi (one extra bit
+	 * beyond the top array word), or cc == hi and tb is true (no
+	 * extra bit, but the result is not lower than the modulus). In
+	 * these cases we must subtract m once.
+	 *
+	 * Otherwise, we may have overestimated, which will show as
+	 * cc > hi (thus a negative result). Correction is adding m once.
+	 */
+	chf = (uint32_t)(cc >> 32);
+	clow = (uint32_t)cc;
+	over = chf | GT(clow, hi);
+	under = ~over & (tb | (~chf & LT(clow, hi)));
+	br_i32_add(x, m, over);
+	br_i32_sub(x, m, under);
+}
diff --git a/third_party/bearssl/src/i32_ninv32.c b/third_party/bearssl/src/i32_ninv32.c
new file mode 100644
index 0000000..6564434
--- /dev/null
+++ b/third_party/bearssl/src/i32_ninv32.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_ninv32(uint32_t x)
+{
+	uint32_t y;
+
+	y = 2 - x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	return MUX(x & 1, -y, 0);
+}
diff --git a/third_party/bearssl/src/i32_reduce.c b/third_party/bearssl/src/i32_reduce.c
new file mode 100644
index 0000000..90fff09
--- /dev/null
+++ b/third_party/bearssl/src/i32_reduce.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
+{
+	uint32_t m_bitlen, a_bitlen;
+	size_t mlen, alen, u;
+
+	m_bitlen = m[0];
+	mlen = (m_bitlen + 31) >> 5;
+
+	x[0] = m_bitlen;
+	if (m_bitlen == 0) {
+		return;
+	}
+
+	/*
+	 * If the source is shorter, then simply copy all words from a[]
+	 * and zero out the upper words.
+	 */
+	a_bitlen = a[0];
+	alen = (a_bitlen + 31) >> 5;
+	if (a_bitlen < m_bitlen) {
+		memcpy(x + 1, a + 1, alen * sizeof *a);
+		for (u = alen; u < mlen; u ++) {
+			x[u + 1] = 0;
+		}
+		return;
+	}
+
+	/*
+	 * The source length is at least equal to that of the modulus.
+	 * We must thus copy N-1 words, and input the remaining words
+	 * one by one.
+	 */
+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
+	x[mlen] = 0;
+	for (u = 1 + alen - mlen; u > 0; u --) {
+		br_i32_muladd_small(x, a[u], m);
+	}
+}
diff --git a/third_party/bearssl/src/i32_sub.c b/third_party/bearssl/src/i32_sub.c
new file mode 100644
index 0000000..9c50023
--- /dev/null
+++ b/third_party/bearssl/src/i32_sub.c
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw - bw - cc;
+
+		/*
+		 * Carry is 1 if naw > aw. Carry is 1 also if naw == aw
+		 * AND the carry was already 1.
+		 */
+		cc = (cc & EQ(naw, aw)) | GT(naw, aw);
+		a[u] = MUX(ctl, naw, aw);
+	}
+	return cc;
+}
diff --git a/third_party/bearssl/src/i32_tmont.c b/third_party/bearssl/src/i32_tmont.c
new file mode 100644
index 0000000..058cd88
--- /dev/null
+++ b/third_party/bearssl/src/i32_tmont.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_i32_to_monty(uint32_t *x, const uint32_t *m)
+{
+	uint32_t k;
+
+	for (k = (m[0] + 31) >> 5; k > 0; k --) {
+		br_i32_muladd_small(x, 0, m);
+	}
+}
diff --git a/third_party/bearssl/src/i62_modpow2.c b/third_party/bearssl/src/i62_modpow2.c
new file mode 100644
index 0000000..2db537f
--- /dev/null
+++ b/third_party/bearssl/src/i62_modpow2.c
@@ -0,0 +1,493 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_INT128
+
+/*
+ * Compute x*y+v1+v2. Operands are 64-bit, and result is 128-bit, with
+ * high word in "hi" and low word in "lo".
+ */
+#define FMA1(hi, lo, x, y, v1, v2)   do { \
+		unsigned __int128 fmaz; \
+		fmaz = (unsigned __int128)(x) * (unsigned __int128)(y) \
+			+ (unsigned __int128)(v1) + (unsigned __int128)(v2); \
+		(hi) = (uint64_t)(fmaz >> 64); \
+		(lo) = (uint64_t)fmaz; \
+	} while (0)
+
+/*
+ * Compute x1*y1+x2*y2+v1+v2. Operands are 64-bit, and result is 128-bit,
+ * with high word in "hi" and low word in "lo".
+ *
+ * Callers should ensure that the two inner products, and the v1 and v2
+ * operands, are multiple of 4 (this is not used by this specific definition
+ * but may help other implementations).
+ */
+#define FMA2(hi, lo, x1, y1, x2, y2, v1, v2)   do { \
+		unsigned __int128 fmaz; \
+		fmaz = (unsigned __int128)(x1) * (unsigned __int128)(y1) \
+			+ (unsigned __int128)(x2) * (unsigned __int128)(y2) \
+			+ (unsigned __int128)(v1) + (unsigned __int128)(v2); \
+		(hi) = (uint64_t)(fmaz >> 64); \
+		(lo) = (uint64_t)fmaz; \
+	} while (0)
+
+#elif BR_UMUL128
+
+#include <intrin.h>
+
+#define FMA1(hi, lo, x, y, v1, v2)   do { \
+		uint64_t fmahi, fmalo; \
+		unsigned char fmacc; \
+		fmalo = _umul128((x), (y), &fmahi); \
+		fmacc = _addcarry_u64(0, fmalo, (v1), &fmalo); \
+		_addcarry_u64(fmacc, fmahi, 0, &fmahi); \
+		fmacc = _addcarry_u64(0, fmalo, (v2), &(lo)); \
+		_addcarry_u64(fmacc, fmahi, 0, &(hi)); \
+	} while (0)
+
+/*
+ * Normally we should use _addcarry_u64() for FMA2 too, but it makes
+ * Visual Studio crash. Instead we use this version, which leverages
+ * the fact that the vx operands, and the products, are multiple of 4.
+ * This is unfortunately slower.
+ */
+#define FMA2(hi, lo, x1, y1, x2, y2, v1, v2)   do { \
+		uint64_t fma1hi, fma1lo; \
+		uint64_t fma2hi, fma2lo; \
+		uint64_t fmatt; \
+		fma1lo = _umul128((x1), (y1), &fma1hi); \
+		fma2lo = _umul128((x2), (y2), &fma2hi); \
+		fmatt = (fma1lo >> 2) + (fma2lo >> 2) \
+			+ ((v1) >> 2) + ((v2) >> 2); \
+		(lo) = fmatt << 2; \
+		(hi) = fma1hi + fma2hi + (fmatt >> 62); \
+	} while (0)
+
+/*
+ * The FMA2 macro definition we would prefer to use, but it triggers
+ * an internal compiler error in Visual Studio 2015.
+ *
+#define FMA2(hi, lo, x1, y1, x2, y2, v1, v2)   do { \
+		uint64_t fma1hi, fma1lo; \
+		uint64_t fma2hi, fma2lo; \
+		unsigned char fmacc; \
+		fma1lo = _umul128((x1), (y1), &fma1hi); \
+		fma2lo = _umul128((x2), (y2), &fma2hi); \
+		fmacc = _addcarry_u64(0, fma1lo, (v1), &fma1lo); \
+		_addcarry_u64(fmacc, fma1hi, 0, &fma1hi); \
+		fmacc = _addcarry_u64(0, fma2lo, (v2), &fma2lo); \
+		_addcarry_u64(fmacc, fma2hi, 0, &fma2hi); \
+		fmacc = _addcarry_u64(0, fma1lo, fma2lo, &(lo)); \
+		_addcarry_u64(fmacc, fma1hi, fma2hi, &(hi)); \
+	} while (0)
+ */
+
+#endif
+
+#define MASK62           ((uint64_t)0x3FFFFFFFFFFFFFFF)
+#define MUL62_lo(x, y)   (((uint64_t)(x) * (uint64_t)(y)) & MASK62)
+
+/*
+ * Subtract b from a, and return the final carry. If 'ctl32' is 0, then
+ * a[] is kept unmodified, but the final carry is still computed and
+ * returned.
+ */
+static uint32_t
+i62_sub(uint64_t *a, const uint64_t *b, size_t num, uint32_t ctl32)
+{
+	uint64_t cc, mask;
+	size_t u;
+
+	cc = 0;
+	ctl32 = -ctl32;
+	mask = (uint64_t)ctl32 | ((uint64_t)ctl32 << 32);
+	for (u = 0; u < num; u ++) {
+		uint64_t aw, bw, dw;
+
+		aw = a[u];
+		bw = b[u];
+		dw = aw - bw - cc;
+		cc = dw >> 63;
+		dw &= MASK62;
+		a[u] = aw ^ (mask & (dw ^ aw));
+	}
+	return (uint32_t)cc;
+}
+
+/*
+ * Montgomery multiplication, over arrays of 62-bit values. The
+ * destination array (d) must be distinct from the other operands
+ * (x, y and m). All arrays are in little-endian format (least
+ * significant word comes first) over 'num' words.
+ */
+static void
+montymul(uint64_t *d, const uint64_t *x, const uint64_t *y,
+	const uint64_t *m, size_t num, uint64_t m0i)
+{
+	uint64_t dh;
+	size_t u, num4;
+
+	num4 = 1 + ((num - 1) & ~(size_t)3);
+	memset(d, 0, num * sizeof *d);
+	dh = 0;
+	for (u = 0; u < num; u ++) {
+		size_t v;
+		uint64_t f, xu;
+		uint64_t r, zh;
+		uint64_t hi, lo;
+
+		xu = x[u] << 2;
+		f = MUL62_lo(d[0] + MUL62_lo(x[u], y[0]), m0i) << 2;
+
+		FMA2(hi, lo, xu, y[0], f, m[0], d[0] << 2, 0);
+		r = hi;
+
+		for (v = 1; v < num4; v += 4) {
+			FMA2(hi, lo, xu, y[v + 0],
+				f, m[v + 0], d[v + 0] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v - 1] = lo >> 2;
+			FMA2(hi, lo, xu, y[v + 1],
+				f, m[v + 1], d[v + 1] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v + 0] = lo >> 2;
+			FMA2(hi, lo, xu, y[v + 2],
+				f, m[v + 2], d[v + 2] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v + 1] = lo >> 2;
+			FMA2(hi, lo, xu, y[v + 3],
+				f, m[v + 3], d[v + 3] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v + 2] = lo >> 2;
+		}
+		for (; v < num; v ++) {
+			FMA2(hi, lo, xu, y[v], f, m[v], d[v] << 2, r << 2);
+			r = hi + (r >> 62);
+			d[v - 1] = lo >> 2;
+		}
+
+		zh = dh + r;
+		d[num - 1] = zh & MASK62;
+		dh = zh >> 62;
+	}
+	i62_sub(d, m, num, (uint32_t)dh | NOT(i62_sub(d, m, num, 0)));
+}
+
+/*
+ * Conversion back from Montgomery representation.
+ */
+static void
+frommonty(uint64_t *x, const uint64_t *m, size_t num, uint64_t m0i)
+{
+	size_t u, v;
+
+	for (u = 0; u < num; u ++) {
+		uint64_t f, cc;
+
+		f = MUL62_lo(x[0], m0i) << 2;
+		cc = 0;
+		for (v = 0; v < num; v ++) {
+			uint64_t hi, lo;
+
+			FMA1(hi, lo, f, m[v], x[v] << 2, cc);
+			cc = hi << 2;
+			if (v != 0) {
+				x[v - 1] = lo >> 2;
+			}
+		}
+		x[num - 1] = cc >> 2;
+	}
+	i62_sub(x, m, num, NOT(i62_sub(x, m, num, 0)));
+}
+
+/* see inner.h */
+uint32_t
+br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen,
+	const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen)
+{
+	size_t u, mw31num, mw62num;
+	uint64_t *x, *m, *t1, *t2;
+	uint64_t m0i;
+	uint32_t acc;
+	int win_len, acc_len;
+
+	/*
+	 * Get modulus size, in words.
+	 */
+	mw31num = (m31[0] + 31) >> 5;
+	mw62num = (mw31num + 1) >> 1;
+
+	/*
+	 * In order to apply this function, we must have enough room to
+	 * copy the operand and modulus into the temporary array, along
+	 * with at least two temporaries. If there is not enough room,
+	 * switch to br_i31_modpow(). We also use br_i31_modpow() if the
+	 * modulus length is not at least four words (94 bits or more).
+	 */
+	if (mw31num < 4 || (mw62num << 2) > twlen) {
+		/*
+		 * We assume here that we can split an aligned uint64_t
+		 * into two properly aligned uint32_t. Since both types
+		 * are supposed to have an exact width with no padding,
+		 * then this property must hold.
+		 */
+		size_t txlen;
+
+		txlen = mw31num + 1;
+		if (twlen < txlen) {
+			return 0;
+		}
+		br_i31_modpow(x31, e, elen, m31, m0i31,
+			(uint32_t *)tmp, (uint32_t *)tmp + txlen);
+		return 1;
+	}
+
+	/*
+	 * Convert x to Montgomery representation: this means that
+	 * we replace x with x*2^z mod m, where z is the smallest multiple
+	 * of the word size such that 2^z >= m. We want to reuse the 31-bit
+	 * functions here (for constant-time operation), but we need z
+	 * for a 62-bit word size.
+	 */
+	for (u = 0; u < mw62num; u ++) {
+		br_i31_muladd_small(x31, 0, m31);
+		br_i31_muladd_small(x31, 0, m31);
+	}
+
+	/*
+	 * Assemble operands into arrays of 62-bit words. Note that
+	 * all the arrays of 62-bit words that we will handle here
+	 * are without any leading size word.
+	 *
+	 * We also adjust tmp and twlen to account for the words used
+	 * for these extra arrays.
+	 */
+	m = tmp;
+	x = tmp + mw62num;
+	tmp += (mw62num << 1);
+	twlen -= (mw62num << 1);
+	for (u = 0; u < mw31num; u += 2) {
+		size_t v;
+
+		v = u >> 1;
+		if ((u + 1) == mw31num) {
+			m[v] = (uint64_t)m31[u + 1];
+			x[v] = (uint64_t)x31[u + 1];
+		} else {
+			m[v] = (uint64_t)m31[u + 1]
+				+ ((uint64_t)m31[u + 2] << 31);
+			x[v] = (uint64_t)x31[u + 1]
+				+ ((uint64_t)x31[u + 2] << 31);
+		}
+	}
+
+	/*
+	 * Compute window size. We support windows up to 5 bits; for a
+	 * window of size k bits, we need 2^k+1 temporaries (for k = 1,
+	 * we use special code that uses only 2 temporaries).
+	 */
+	for (win_len = 5; win_len > 1; win_len --) {
+		if ((((uint32_t)1 << win_len) + 1) * mw62num <= twlen) {
+			break;
+		}
+	}
+
+	t1 = tmp;
+	t2 = tmp + mw62num;
+
+	/*
+	 * Compute m0i, which is equal to -(1/m0) mod 2^62. We were
+	 * provided with m0i31, which already fulfills this property
+	 * modulo 2^31; the single expression below is then sufficient.
+	 */
+	m0i = (uint64_t)m0i31;
+	m0i = MUL62_lo(m0i, (uint64_t)2 + MUL62_lo(m0i, m[0]));
+
+	/*
+	 * Compute window contents. If the window has size one bit only,
+	 * then t2 is set to x; otherwise, t2[0] is left untouched, and
+	 * t2[k] is set to x^k (for k >= 1).
+	 */
+	if (win_len == 1) {
+		memcpy(t2, x, mw62num * sizeof *x);
+	} else {
+		uint64_t *base;
+
+		memcpy(t2 + mw62num, x, mw62num * sizeof *x);
+		base = t2 + mw62num;
+		for (u = 2; u < ((unsigned)1 << win_len); u ++) {
+			montymul(base + mw62num, base, x, m, mw62num, m0i);
+			base += mw62num;
+		}
+	}
+
+	/*
+	 * Set x to 1, in Montgomery representation. We again use the
+	 * 31-bit code.
+	 */
+	br_i31_zero(x31, m31[0]);
+	x31[(m31[0] + 31) >> 5] = 1;
+	br_i31_muladd_small(x31, 0, m31);
+	if (mw31num & 1) {
+		br_i31_muladd_small(x31, 0, m31);
+	}
+	for (u = 0; u < mw31num; u += 2) {
+		size_t v;
+
+		v = u >> 1;
+		if ((u + 1) == mw31num) {
+			x[v] = (uint64_t)x31[u + 1];
+		} else {
+			x[v] = (uint64_t)x31[u + 1]
+				+ ((uint64_t)x31[u + 2] << 31);
+		}
+	}
+
+	/*
+	 * We process bits from most to least significant. At each
+	 * loop iteration, we have acc_len bits in acc.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (acc_len > 0 || elen > 0) {
+		int i, k;
+		uint32_t bits;
+		uint64_t mask1, mask2;
+
+		/*
+		 * Get the next bits.
+		 */
+		k = win_len;
+		if (acc_len < win_len) {
+			if (elen > 0) {
+				acc = (acc << 8) | *e ++;
+				elen --;
+				acc_len += 8;
+			} else {
+				k = acc_len;
+			}
+		}
+		bits = (acc >> (acc_len - k)) & (((uint32_t)1 << k) - 1);
+		acc_len -= k;
+
+		/*
+		 * We could get exactly k bits. Compute k squarings.
+		 */
+		for (i = 0; i < k; i ++) {
+			montymul(t1, x, x, m, mw62num, m0i);
+			memcpy(x, t1, mw62num * sizeof *x);
+		}
+
+		/*
+		 * Window lookup: we want to set t2 to the window
+		 * lookup value, assuming the bits are non-zero. If
+		 * the window length is 1 bit only, then t2 is
+		 * already set; otherwise, we do a constant-time lookup.
+		 */
+		if (win_len > 1) {
+			uint64_t *base;
+
+			memset(t2, 0, mw62num * sizeof *t2);
+			base = t2 + mw62num;
+			for (u = 1; u < ((uint32_t)1 << k); u ++) {
+				uint64_t mask;
+				size_t v;
+
+				mask = -(uint64_t)EQ(u, bits);
+				for (v = 0; v < mw62num; v ++) {
+					t2[v] |= mask & base[v];
+				}
+				base += mw62num;
+			}
+		}
+
+		/*
+		 * Multiply with the looked-up value. We keep the product
+		 * only if the exponent bits are not all-zero.
+		 */
+		montymul(t1, x, t2, m, mw62num, m0i);
+		mask1 = -(uint64_t)EQ(bits, 0);
+		mask2 = ~mask1;
+		for (u = 0; u < mw62num; u ++) {
+			x[u] = (mask1 & x[u]) | (mask2 & t1[u]);
+		}
+	}
+
+	/*
+	 * Convert back from Montgomery representation.
+	 */
+	frommonty(x, m, mw62num, m0i);
+
+	/*
+	 * Convert result into 31-bit words.
+	 */
+	for (u = 0; u < mw31num; u += 2) {
+		uint64_t zw;
+
+		zw = x[u >> 1];
+		x31[u + 1] = (uint32_t)zw & 0x7FFFFFFF;
+		if ((u + 1) < mw31num) {
+			x31[u + 2] = (uint32_t)(zw >> 31);
+		}
+	}
+	return 1;
+}
+
+#else
+
+/* see inner.h */
+uint32_t
+br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen,
+	const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen)
+{
+	size_t mwlen;
+
+	mwlen = (m31[0] + 63) >> 5;
+	if (twlen < mwlen) {
+		return 0;
+	}
+	return br_i31_modpow_opt(x31, e, elen, m31, m0i31,
+		(uint32_t *)tmp, twlen << 1);
+}
+
+#endif
+
+/* see inner.h */
+uint32_t
+br_i62_modpow_opt_as_i31(uint32_t *x31, const unsigned char *e, size_t elen,
+	const uint32_t *m31, uint32_t m0i31, uint32_t *tmp, size_t twlen)
+{
+	/*
+	 * As documented, this function expects the 'tmp' argument to be
+	 * 64-bit aligned. This is OK since this function is internal (it
+	 * is not part of BearSSL's public API).
+	 */
+	return br_i62_modpow_opt(x31, e, elen, m31, m0i31,
+		(uint64_t *)tmp, twlen >> 1);
+}
diff --git a/third_party/bearssl/src/inner.h b/third_party/bearssl/src/inner.h
new file mode 100644
index 0000000..0d40825
--- /dev/null
+++ b/third_party/bearssl/src/inner.h
@@ -0,0 +1,2559 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INNER_H__
+#define INNER_H__
+
+#include <string.h>
+#include <limits.h>
+
+#include "config.h"
+#include "bearssl.h"
+
+/*
+ * On MSVC, disable the warning about applying unary minus on an
+ * unsigned type: it is standard, we do it all the time, and for
+ * good reasons.
+ */
+#if _MSC_VER
+#pragma warning( disable : 4146 )
+#endif
+
+/*
+ * Maximum size for a RSA modulus (in bits). Allocated stack buffers
+ * depend on that size, so this value should be kept small. Currently,
+ * 2048-bit RSA keys offer adequate security, and should still do so for
+ * the next few decades; however, a number of widespread PKI have
+ * already set their root keys to RSA-4096, so we should be able to
+ * process such keys.
+ *
+ * This value MUST be a multiple of 64. This value MUST NOT exceed 47666
+ * (some computations in RSA key generation rely on the factor size being
+ * no more than 23833 bits). RSA key sizes beyond 3072 bits don't make a
+ * lot of sense anyway.
+ */
+#define BR_MAX_RSA_SIZE   4096
+
+/*
+ * Minimum size for a RSA modulus (in bits); this value is used only to
+ * filter out invalid parameters for key pair generation. Normally,
+ * applications should not use RSA keys smaller than 2048 bits; but some
+ * specific cases might need shorter keys, for legacy or research
+ * purposes.
+ */
+#define BR_MIN_RSA_SIZE   512
+
+/*
+ * Maximum size for a RSA factor (in bits). This is for RSA private-key
+ * operations. Default is to support factors up to a bit more than half
+ * the maximum modulus size.
+ *
+ * This value MUST be a multiple of 32.
+ */
+#define BR_MAX_RSA_FACTOR   ((BR_MAX_RSA_SIZE + 64) >> 1)
+
+/*
+ * Maximum size for an EC curve (modulus or order), in bits. Size of
+ * stack buffers depends on that parameter. This size MUST be a multiple
+ * of 8 (so that decoding an integer with that many bytes does not
+ * overflow).
+ */
+#define BR_MAX_EC_SIZE   528
+
+/*
+ * Some macros to recognize the current architecture. Right now, we are
+ * interested into automatically recognizing architecture with efficient
+ * 64-bit types so that we may automatically use implementations that
+ * use 64-bit registers in that case. Future versions may detect, e.g.,
+ * availability of SSE2 intrinsics.
+ *
+ * If 'unsigned long' is a 64-bit type, then we assume that 64-bit types
+ * are efficient. Otherwise, we rely on macros that depend on compiler,
+ * OS and architecture. In any case, failure to detect the architecture
+ * as 64-bit means that the 32-bit code will be used, and that code
+ * works also on 64-bit architectures (the 64-bit code may simply be
+ * more efficient).
+ *
+ * The test on 'unsigned long' should already catch most cases, the one
+ * notable exception being Windows code where 'unsigned long' is kept to
+ * 32-bit for compatibility with all the legacy code that liberally uses
+ * the 'DWORD' type for 32-bit values.
+ *
+ * Macro names are taken from: http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros
+ */
+#ifndef BR_64
+#if ((ULONG_MAX >> 31) >> 31) == 3
+#define BR_64   1
+#elif defined(__ia64) || defined(__itanium__) || defined(_M_IA64)
+#define BR_64   1
+#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
+	|| defined(__64BIT__) || defined(_LP64) || defined(__LP64__)
+#define BR_64   1
+#elif defined(__sparc64__)
+#define BR_64   1
+#elif defined(__x86_64__) || defined(_M_X64)
+#define BR_64   1
+#elif defined(__aarch64__) || defined(_M_ARM64)
+#define BR_64   1
+#elif defined(__mips64)
+#define BR_64   1
+#endif
+#endif
+
+/*
+ * Set BR_LOMUL on platforms where it makes sense.
+ */
+#ifndef BR_LOMUL
+#if BR_ARMEL_CORTEXM_GCC
+#define BR_LOMUL   1
+#endif
+#endif
+
+/*
+ * Architecture detection.
+ */
+#ifndef BR_i386
+#if __i386__ || _M_IX86
+#define BR_i386   1
+#endif
+#endif
+
+#ifndef BR_amd64
+#if __x86_64__ || _M_X64
+#define BR_amd64   1
+#endif
+#endif
+
+/*
+ * Compiler brand and version.
+ *
+ * Implementations that use intrinsics need to detect the compiler type
+ * and version because some specific actions may be needed to activate
+ * the corresponding opcodes, both for header inclusion, and when using
+ * them in a function.
+ *
+ * BR_GCC, BR_CLANG and BR_MSC will be set to 1 for, respectively, GCC,
+ * Clang and MS Visual C. For each of them, sub-macros will be defined
+ * for versions; each sub-macro is set whenever the compiler version is
+ * at least as recent as the one corresponding to the macro.
+ */
+
+/*
+ * GCC thresholds are on versions 4.4 to 4.9 and 5.0.
+ */
+#ifndef BR_GCC
+#if __GNUC__ && !__clang__
+#define BR_GCC   1
+
+#if __GNUC__ > 4
+#define BR_GCC_5_0   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
+#define BR_GCC_4_9   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 8
+#define BR_GCC_4_8   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 7
+#define BR_GCC_4_7   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 6
+#define BR_GCC_4_6   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 5
+#define BR_GCC_4_5   1
+#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 4
+#define BR_GCC_4_4   1
+#endif
+
+#if BR_GCC_5_0
+#define BR_GCC_4_9   1
+#endif
+#if BR_GCC_4_9
+#define BR_GCC_4_8   1
+#endif
+#if BR_GCC_4_8
+#define BR_GCC_4_7   1
+#endif
+#if BR_GCC_4_7
+#define BR_GCC_4_6   1
+#endif
+#if BR_GCC_4_6
+#define BR_GCC_4_5   1
+#endif
+#if BR_GCC_4_5
+#define BR_GCC_4_4   1
+#endif
+
+#endif
+#endif
+
+/*
+ * Clang thresholds are on versions 3.7.0 and 3.8.0.
+ */
+#ifndef BR_CLANG
+#if __clang__
+#define BR_CLANG   1
+
+#if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)
+#define BR_CLANG_3_8   1
+#elif __clang_major__ == 3 && __clang_minor__ >= 7
+#define BR_CLANG_3_7   1
+#endif
+
+#if BR_CLANG_3_8
+#define BR_CLANG_3_7   1
+#endif
+
+#endif
+#endif
+
+/*
+ * MS Visual C thresholds are on Visual Studio 2005 to 2015.
+ */
+#ifndef BR_MSC
+#if _MSC_VER
+#define BR_MSC   1
+
+#if _MSC_VER >= 1900
+#define BR_MSC_2015   1
+#elif _MSC_VER >= 1800
+#define BR_MSC_2013   1
+#elif _MSC_VER >= 1700
+#define BR_MSC_2012   1
+#elif _MSC_VER >= 1600
+#define BR_MSC_2010   1
+#elif _MSC_VER >= 1500
+#define BR_MSC_2008   1
+#elif _MSC_VER >= 1400
+#define BR_MSC_2005   1
+#endif
+
+#if BR_MSC_2015
+#define BR_MSC_2013   1
+#endif
+#if BR_MSC_2013
+#define BR_MSC_2012   1
+#endif
+#if BR_MSC_2012
+#define BR_MSC_2010   1
+#endif
+#if BR_MSC_2010
+#define BR_MSC_2008   1
+#endif
+#if BR_MSC_2008
+#define BR_MSC_2005   1
+#endif
+
+#endif
+#endif
+
+/*
+ * GCC 4.4+ and Clang 3.7+ allow tagging specific functions with a
+ * 'target' attribute that activates support for specific opcodes.
+ */
+#if BR_GCC_4_4 || BR_CLANG_3_7
+#define BR_TARGET(x)   __attribute__((target(x)))
+#else
+#define BR_TARGET(x)
+#endif
+
+/*
+ * AES-NI intrinsics are available on x86 (32-bit and 64-bit) with
+ * GCC 4.8+, Clang 3.7+ and MSC 2012+.
+ */
+#ifndef BR_AES_X86NI
+#if (BR_i386 || BR_amd64) && (BR_GCC_4_8 || BR_CLANG_3_7 || BR_MSC_2012)
+#define BR_AES_X86NI   1
+#endif
+#endif
+
+/*
+ * SSE2 intrinsics are available on x86 (32-bit and 64-bit) with
+ * GCC 4.4+, Clang 3.7+ and MSC 2005+.
+ */
+#ifndef BR_SSE2
+#if (BR_i386 || BR_amd64) && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005)
+#define BR_SSE2   1
+#endif
+#endif
+
+/*
+ * RDRAND intrinsics are available on x86 (32-bit and 64-bit) with
+ * GCC 4.6+, Clang 3.7+ and MSC 2012+.
+ */
+#ifndef BR_RDRAND
+#if (BR_i386 || BR_amd64) && (BR_GCC_4_6 || BR_CLANG_3_7 || BR_MSC_2012)
+#define BR_RDRAND   1
+#endif
+#endif
+
+/*
+ * Determine type of OS for random number generation. Macro names and
+ * values are documented on:
+ *    https://sourceforge.net/p/predef/wiki/OperatingSystems/
+ *
+ * Win32's CryptGenRandom() should be available on Windows systems.
+ *
+ * /dev/urandom should work on all Unix-like systems (including macOS X).
+ *
+ * getentropy() is present on Linux (Glibc 2.25+), FreeBSD (12.0+) and
+ * OpenBSD (5.6+). For OpenBSD, there does not seem to be easy to use
+ * macros to test the minimum version, so we just assume that it is
+ * recent enough (last version without getentropy() has gone out of
+ * support in May 2015).
+ *
+ * Ideally we should use getentropy() on macOS (10.12+) too, but I don't
+ * know how to test the exact OS version with preprocessor macros.
+ *
+ * TODO: enrich the list of detected system.
+ */
+
+#ifndef BR_USE_URANDOM
+#if defined _AIX \
+	|| defined __ANDROID__ \
+	|| defined __FreeBSD__ \
+	|| defined __NetBSD__ \
+	|| defined __OpenBSD__ \
+	|| defined __DragonFly__ \
+	|| defined __linux__ \
+	|| (defined __sun && (defined __SVR4 || defined __svr4__)) \
+	|| (defined __APPLE__ && defined __MACH__)
+#define BR_USE_URANDOM   1
+#endif
+#endif
+
+#ifndef BR_USE_GETENTROPY
+#if (defined __linux__ \
+	&& (__GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 25))) \
+	|| (defined __FreeBSD__ && __FreeBSD__ >= 12) \
+	|| defined __OpenBSD__
+#define BR_USE_GETENTROPY   1
+#endif
+#endif
+
+#ifndef BR_USE_WIN32_RAND
+#if defined _WIN32 || defined _WIN64
+#define BR_USE_WIN32_RAND   1
+#endif
+#endif
+
+/*
+ * POWER8 crypto support. We rely on compiler macros for the
+ * architecture, since we do not have a reliable, simple way to detect
+ * the required support at runtime (we could try running an opcode, and
+ * trapping the exception or signal on illegal instruction, but this
+ * induces some non-trivial OS dependencies that we would prefer to
+ * avoid if possible).
+ */
+#ifndef BR_POWER8
+#if __GNUC__ && ((_ARCH_PWR8 || _ARCH_PPC) && __CRYPTO__)
+#define BR_POWER8   1
+#endif
+#endif
+
+/*
+ * Detect endinanness on POWER8.
+ */
+#if BR_POWER8
+#if defined BR_POWER8_LE
+#undef BR_POWER8_BE
+#if BR_POWER8_LE
+#define BR_POWER8_BE   0
+#else
+#define BR_POWER8_BE   1
+#endif
+#elif defined BR_POWER8_BE
+#undef BR_POWER8_LE
+#if BR_POWER8_BE
+#define BR_POWER8_LE   0
+#else
+#define BR_POWER8_LE   1
+#endif
+#else
+#if __LITTLE_ENDIAN__
+#define BR_POWER8_LE   1
+#define BR_POWER8_BE   0
+#else
+#define BR_POWER8_LE   0
+#define BR_POWER8_BE   1
+#endif
+#endif
+#endif
+
+/*
+ * Detect support for 128-bit integers.
+ */
+#if !defined BR_INT128 && !defined BR_UMUL128
+#ifdef __SIZEOF_INT128__
+#define BR_INT128    1
+#elif _M_X64
+#define BR_UMUL128   1
+#endif
+#endif
+
+/*
+ * Detect support for unaligned accesses with known endianness.
+ *
+ *  x86 (both 32-bit and 64-bit) is little-endian and allows unaligned
+ *  accesses.
+ *
+ *  POWER/PowerPC allows unaligned accesses when big-endian. POWER8 and
+ *  later also allow unaligned accesses when little-endian.
+ */
+#if !defined BR_LE_UNALIGNED && !defined BR_BE_UNALIGNED
+
+#if __i386 || __i386__ || __x86_64__ || _M_IX86 || _M_X64
+#define BR_LE_UNALIGNED   1
+#elif BR_POWER8_BE
+#define BR_BE_UNALIGNED   1
+#elif BR_POWER8_LE
+#define BR_LE_UNALIGNED   1
+#elif (__powerpc__ || __powerpc64__ || _M_PPC || _ARCH_PPC || _ARCH_PPC64) \
+	&& __BIG_ENDIAN__
+#define BR_BE_UNALIGNED   1
+#endif
+
+#endif
+
+/* ==================================================================== */
+/*
+ * Encoding/decoding functions.
+ *
+ * 32-bit and 64-bit decoding, both little-endian and big-endian, is
+ * implemented with the inline functions below.
+ *
+ * When allowed by some compile-time options (autodetected or provided),
+ * optimised code is used, to perform direct memory access when the
+ * underlying architecture supports it, both for endianness and
+ * alignment. This, however, may trigger strict aliasing issues; the
+ * code below uses unions to perform (supposedly) safe type punning.
+ * Since the C aliasing rules are relatively complex and were amended,
+ * or at least re-explained with different phrasing, in all successive
+ * versions of the C standard, it is always a bit risky to bet that any
+ * specific version of a C compiler got it right, for some notion of
+ * "right".
+ */
+
+typedef union {
+	uint16_t u;
+	unsigned char b[sizeof(uint16_t)];
+} br_union_u16;
+
+typedef union {
+	uint32_t u;
+	unsigned char b[sizeof(uint32_t)];
+} br_union_u32;
+
+typedef union {
+	uint64_t u;
+	unsigned char b[sizeof(uint64_t)];
+} br_union_u64;
+
+static inline void
+br_enc16le(void *dst, unsigned x)
+{
+#if BR_LE_UNALIGNED
+	((br_union_u16 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = (unsigned char)x;
+	buf[1] = (unsigned char)(x >> 8);
+#endif
+}
+
+static inline void
+br_enc16be(void *dst, unsigned x)
+{
+#if BR_BE_UNALIGNED
+	((br_union_u16 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = (unsigned char)(x >> 8);
+	buf[1] = (unsigned char)x;
+#endif
+}
+
+static inline unsigned
+br_dec16le(const void *src)
+{
+#if BR_LE_UNALIGNED
+	return ((const br_union_u16 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return (unsigned)buf[0] | ((unsigned)buf[1] << 8);
+#endif
+}
+
+static inline unsigned
+br_dec16be(const void *src)
+{
+#if BR_BE_UNALIGNED
+	return ((const br_union_u16 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return ((unsigned)buf[0] << 8) | (unsigned)buf[1];
+#endif
+}
+
+static inline void
+br_enc32le(void *dst, uint32_t x)
+{
+#if BR_LE_UNALIGNED
+	((br_union_u32 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = (unsigned char)x;
+	buf[1] = (unsigned char)(x >> 8);
+	buf[2] = (unsigned char)(x >> 16);
+	buf[3] = (unsigned char)(x >> 24);
+#endif
+}
+
+static inline void
+br_enc32be(void *dst, uint32_t x)
+{
+#if BR_BE_UNALIGNED
+	((br_union_u32 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	buf[0] = (unsigned char)(x >> 24);
+	buf[1] = (unsigned char)(x >> 16);
+	buf[2] = (unsigned char)(x >> 8);
+	buf[3] = (unsigned char)x;
+#endif
+}
+
+static inline uint32_t
+br_dec32le(const void *src)
+{
+#if BR_LE_UNALIGNED
+	return ((const br_union_u32 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return (uint32_t)buf[0]
+		| ((uint32_t)buf[1] << 8)
+		| ((uint32_t)buf[2] << 16)
+		| ((uint32_t)buf[3] << 24);
+#endif
+}
+
+static inline uint32_t
+br_dec32be(const void *src)
+{
+#if BR_BE_UNALIGNED
+	return ((const br_union_u32 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return ((uint32_t)buf[0] << 24)
+		| ((uint32_t)buf[1] << 16)
+		| ((uint32_t)buf[2] << 8)
+		| (uint32_t)buf[3];
+#endif
+}
+
+static inline void
+br_enc64le(void *dst, uint64_t x)
+{
+#if BR_LE_UNALIGNED
+	((br_union_u64 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	br_enc32le(buf, (uint32_t)x);
+	br_enc32le(buf + 4, (uint32_t)(x >> 32));
+#endif
+}
+
+static inline void
+br_enc64be(void *dst, uint64_t x)
+{
+#if BR_BE_UNALIGNED
+	((br_union_u64 *)dst)->u = x;
+#else
+	unsigned char *buf;
+
+	buf = dst;
+	br_enc32be(buf, (uint32_t)(x >> 32));
+	br_enc32be(buf + 4, (uint32_t)x);
+#endif
+}
+
+static inline uint64_t
+br_dec64le(const void *src)
+{
+#if BR_LE_UNALIGNED
+	return ((const br_union_u64 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return (uint64_t)br_dec32le(buf)
+		| ((uint64_t)br_dec32le(buf + 4) << 32);
+#endif
+}
+
+static inline uint64_t
+br_dec64be(const void *src)
+{
+#if BR_BE_UNALIGNED
+	return ((const br_union_u64 *)src)->u;
+#else
+	const unsigned char *buf;
+
+	buf = src;
+	return ((uint64_t)br_dec32be(buf) << 32)
+		| (uint64_t)br_dec32be(buf + 4);
+#endif
+}
+
+/*
+ * Range decoding and encoding (for several successive values).
+ */
+void br_range_dec16le(uint16_t *v, size_t num, const void *src);
+void br_range_dec16be(uint16_t *v, size_t num, const void *src);
+void br_range_enc16le(void *dst, const uint16_t *v, size_t num);
+void br_range_enc16be(void *dst, const uint16_t *v, size_t num);
+
+void br_range_dec32le(uint32_t *v, size_t num, const void *src);
+void br_range_dec32be(uint32_t *v, size_t num, const void *src);
+void br_range_enc32le(void *dst, const uint32_t *v, size_t num);
+void br_range_enc32be(void *dst, const uint32_t *v, size_t num);
+
+void br_range_dec64le(uint64_t *v, size_t num, const void *src);
+void br_range_dec64be(uint64_t *v, size_t num, const void *src);
+void br_range_enc64le(void *dst, const uint64_t *v, size_t num);
+void br_range_enc64be(void *dst, const uint64_t *v, size_t num);
+
+/*
+ * Byte-swap a 32-bit integer.
+ */
+static inline uint32_t
+br_swap32(uint32_t x)
+{
+	x = ((x & (uint32_t)0x00FF00FF) << 8)
+		| ((x >> 8) & (uint32_t)0x00FF00FF);
+	return (x << 16) | (x >> 16);
+}
+
+/* ==================================================================== */
+/*
+ * Support code for hash functions.
+ */
+
+/*
+ * IV for MD5, SHA-1, SHA-224 and SHA-256.
+ */
+extern const uint32_t br_md5_IV[];
+extern const uint32_t br_sha1_IV[];
+extern const uint32_t br_sha224_IV[];
+extern const uint32_t br_sha256_IV[];
+
+/*
+ * Round functions for MD5, SHA-1, SHA-224 and SHA-256 (SHA-224 and
+ * SHA-256 use the same round function).
+ */
+void br_md5_round(const unsigned char *buf, uint32_t *val);
+void br_sha1_round(const unsigned char *buf, uint32_t *val);
+void br_sha2small_round(const unsigned char *buf, uint32_t *val);
+
+/*
+ * The core function for the TLS PRF. It computes
+ * P_hash(secret, label + seed), and XORs the result into the dst buffer.
+ */
+void br_tls_phash(void *dst, size_t len,
+	const br_hash_class *dig,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed);
+
+/*
+ * Copy all configured hash implementations from a multihash context
+ * to another.
+ */
+static inline void
+br_multihash_copyimpl(br_multihash_context *dst,
+	const br_multihash_context *src)
+{
+	memcpy((void *)dst->impl, src->impl, sizeof src->impl);
+}
+
+/* ==================================================================== */
+/*
+ * Constant-time primitives. These functions manipulate 32-bit values in
+ * order to provide constant-time comparisons and multiplexers.
+ *
+ * Boolean values (the "ctl" bits) MUST have value 0 or 1.
+ *
+ * Implementation notes:
+ * =====================
+ *
+ * The uintN_t types are unsigned and with width exactly N bits; the C
+ * standard guarantees that computations are performed modulo 2^N, and
+ * there can be no overflow. Negation (unary '-') works on unsigned types
+ * as well.
+ *
+ * The intN_t types are guaranteed to have width exactly N bits, with no
+ * padding bit, and using two's complement representation. Casting
+ * intN_t to uintN_t really is conversion modulo 2^N. Beware that intN_t
+ * types, being signed, trigger implementation-defined behaviour on
+ * overflow (including raising some signal): with GCC, while modular
+ * arithmetics are usually applied, the optimizer may assume that
+ * overflows don't occur (unless the -fwrapv command-line option is
+ * added); Clang has the additional -ftrapv option to explicitly trap on
+ * integer overflow or underflow.
+ */
+
+/*
+ * Negate a boolean.
+ */
+static inline uint32_t
+NOT(uint32_t ctl)
+{
+	return ctl ^ 1;
+}
+
+/*
+ * Multiplexer: returns x if ctl == 1, y if ctl == 0.
+ */
+static inline uint32_t
+MUX(uint32_t ctl, uint32_t x, uint32_t y)
+{
+	return y ^ (-ctl & (x ^ y));
+}
+
+/*
+ * Equality check: returns 1 if x == y, 0 otherwise.
+ */
+static inline uint32_t
+EQ(uint32_t x, uint32_t y)
+{
+	uint32_t q;
+
+	q = x ^ y;
+	return NOT((q | -q) >> 31);
+}
+
+/*
+ * Inequality check: returns 1 if x != y, 0 otherwise.
+ */
+static inline uint32_t
+NEQ(uint32_t x, uint32_t y)
+{
+	uint32_t q;
+
+	q = x ^ y;
+	return (q | -q) >> 31;
+}
+
+/*
+ * Comparison: returns 1 if x > y, 0 otherwise.
+ */
+static inline uint32_t
+GT(uint32_t x, uint32_t y)
+{
+	/*
+	 * If both x < 2^31 and x < 2^31, then y-x will have its high
+	 * bit set if x > y, cleared otherwise.
+	 *
+	 * If either x >= 2^31 or y >= 2^31 (but not both), then the
+	 * result is the high bit of x.
+	 *
+	 * If both x >= 2^31 and y >= 2^31, then we can virtually
+	 * subtract 2^31 from both, and we are back to the first case.
+	 * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
+	 * fine.
+	 */
+	uint32_t z;
+
+	z = y - x;
+	return (z ^ ((x ^ y) & (x ^ z))) >> 31;
+}
+
+/*
+ * Other comparisons (greater-or-equal, lower-than, lower-or-equal).
+ */
+#define GE(x, y)   NOT(GT(y, x))
+#define LT(x, y)   GT(y, x)
+#define LE(x, y)   NOT(GT(x, y))
+
+/*
+ * General comparison: returned value is -1, 0 or 1, depending on
+ * whether x is lower than, equal to, or greater than y.
+ */
+static inline int32_t
+CMP(uint32_t x, uint32_t y)
+{
+	return (int32_t)GT(x, y) | -(int32_t)GT(y, x);
+}
+
+/*
+ * Returns 1 if x == 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+EQ0(int32_t x)
+{
+	uint32_t q;
+
+	q = (uint32_t)x;
+	return ~(q | -q) >> 31;
+}
+
+/*
+ * Returns 1 if x > 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+GT0(int32_t x)
+{
+	/*
+	 * High bit of -x is 0 if x == 0, but 1 if x > 0.
+	 */
+	uint32_t q;
+
+	q = (uint32_t)x;
+	return (~q & -q) >> 31;
+}
+
+/*
+ * Returns 1 if x >= 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+GE0(int32_t x)
+{
+	return ~(uint32_t)x >> 31;
+}
+
+/*
+ * Returns 1 if x < 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+LT0(int32_t x)
+{
+	return (uint32_t)x >> 31;
+}
+
+/*
+ * Returns 1 if x <= 0, 0 otherwise. Take care that the operand is signed.
+ */
+static inline uint32_t
+LE0(int32_t x)
+{
+	uint32_t q;
+
+	/*
+	 * ~-x has its high bit set if and only if -x is nonnegative (as
+	 * a signed int), i.e. x is in the -(2^31-1) to 0 range. We must
+	 * do an OR with x itself to account for x = -2^31.
+	 */
+	q = (uint32_t)x;
+	return (q | ~-q) >> 31;
+}
+
+/*
+ * Conditional copy: src[] is copied into dst[] if and only if ctl is 1.
+ * dst[] and src[] may overlap completely (but not partially).
+ */
+void br_ccopy(uint32_t ctl, void *dst, const void *src, size_t len);
+
+#define CCOPY   br_ccopy
+
+/*
+ * Compute the bit length of a 32-bit integer. Returned value is between 0
+ * and 32 (inclusive).
+ */
+static inline uint32_t
+BIT_LENGTH(uint32_t x)
+{
+	uint32_t k, c;
+
+	k = NEQ(x, 0);
+	c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
+	c = GT(x, 0x00FF); x = MUX(c, x >>  8, x); k += c << 3;
+	c = GT(x, 0x000F); x = MUX(c, x >>  4, x); k += c << 2;
+	c = GT(x, 0x0003); x = MUX(c, x >>  2, x); k += c << 1;
+	k += GT(x, 0x0001);
+	return k;
+}
+
+/*
+ * Compute the minimum of x and y.
+ */
+static inline uint32_t
+MIN(uint32_t x, uint32_t y)
+{
+	return MUX(GT(x, y), y, x);
+}
+
+/*
+ * Compute the maximum of x and y.
+ */
+static inline uint32_t
+MAX(uint32_t x, uint32_t y)
+{
+	return MUX(GT(x, y), x, y);
+}
+
+/*
+ * Multiply two 32-bit integers, with a 64-bit result. This default
+ * implementation assumes that the basic multiplication operator
+ * yields constant-time code.
+ */
+#define MUL(x, y)   ((uint64_t)(x) * (uint64_t)(y))
+
+#if BR_CT_MUL31
+
+/*
+ * Alternate implementation of MUL31, that will be constant-time on some
+ * (old) platforms where the default MUL31 is not. Unfortunately, it is
+ * also substantially slower, and yields larger code, on more modern
+ * platforms, which is why it is deactivated by default.
+ *
+ * MUL31_lo() must do some extra work because on some platforms, the
+ * _signed_ multiplication may return early if the top bits are 1.
+ * Simply truncating (casting) the output of MUL31() would not be
+ * sufficient, because the compiler may notice that we keep only the low
+ * word, and then replace automatically the unsigned multiplication with
+ * a signed multiplication opcode.
+ */
+#define MUL31(x, y)   ((uint64_t)((x) | (uint32_t)0x80000000) \
+                       * (uint64_t)((y) | (uint32_t)0x80000000) \
+                       - ((uint64_t)(x) << 31) - ((uint64_t)(y) << 31) \
+                       - ((uint64_t)1 << 62))
+static inline uint32_t
+MUL31_lo(uint32_t x, uint32_t y)
+{
+	uint32_t xl, xh;
+	uint32_t yl, yh;
+
+	xl = (x & 0xFFFF) | (uint32_t)0x80000000;
+	xh = (x >> 16) | (uint32_t)0x80000000;
+	yl = (y & 0xFFFF) | (uint32_t)0x80000000;
+	yh = (y >> 16) | (uint32_t)0x80000000;
+	return (xl * yl + ((xl * yh + xh * yl) << 16)) & (uint32_t)0x7FFFFFFF;
+}
+
+#else
+
+/*
+ * Multiply two 31-bit integers, with a 62-bit result. This default
+ * implementation assumes that the basic multiplication operator
+ * yields constant-time code.
+ * The MUL31_lo() macro returns only the low 31 bits of the product.
+ */
+#define MUL31(x, y)     ((uint64_t)(x) * (uint64_t)(y))
+#define MUL31_lo(x, y)  (((uint32_t)(x) * (uint32_t)(y)) & (uint32_t)0x7FFFFFFF)
+
+#endif
+
+/*
+ * Multiply two words together; the sum of the lengths of the two
+ * operands must not exceed 31 (for instance, one operand may use 16
+ * bits if the other fits on 15). If BR_CT_MUL15 is non-zero, then the
+ * macro will contain some extra operations that help in making the
+ * operation constant-time on some platforms, where the basic 32-bit
+ * multiplication is not constant-time.
+ */
+#if BR_CT_MUL15
+#define MUL15(x, y)   (((uint32_t)(x) | (uint32_t)0x80000000) \
+                       * ((uint32_t)(y) | (uint32_t)0x80000000) \
+		       & (uint32_t)0x7FFFFFFF)
+#else
+#define MUL15(x, y)   ((uint32_t)(x) * (uint32_t)(y))
+#endif
+
+/*
+ * Arithmetic right shift (sign bit is copied). What happens when
+ * right-shifting a negative value is _implementation-defined_, so it
+ * does not trigger undefined behaviour, but it is still up to each
+ * compiler to define (and document) what it does. Most/all compilers
+ * will do an arithmetic shift, the sign bit being used to fill the
+ * holes; this is a native operation on the underlying CPU, and it would
+ * make little sense for the compiler to do otherwise. GCC explicitly
+ * documents that it follows that convention.
+ *
+ * Still, if BR_NO_ARITH_SHIFT is defined (and non-zero), then an
+ * alternate version will be used, that does not rely on such
+ * implementation-defined behaviour. Unfortunately, it is also slower
+ * and yields bigger code, which is why it is deactivated by default.
+ */
+#if BR_NO_ARITH_SHIFT
+#define ARSH(x, n)   (((uint32_t)(x) >> (n)) \
+                      | ((-((uint32_t)(x) >> 31)) << (32 - (n))))
+#else
+#define ARSH(x, n)   ((*(int32_t *)&(x)) >> (n))
+#endif
+
+/*
+ * Constant-time division. The dividend hi:lo is divided by the
+ * divisor d; the quotient is returned and the remainder is written
+ * in *r. If hi == d, then the quotient does not fit on 32 bits;
+ * returned value is thus truncated. If hi > d, returned values are
+ * indeterminate.
+ */
+uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r);
+
+/*
+ * Wrapper for br_divrem(); the remainder is returned, and the quotient
+ * is discarded.
+ */
+static inline uint32_t
+br_rem(uint32_t hi, uint32_t lo, uint32_t d)
+{
+	uint32_t r;
+
+	br_divrem(hi, lo, d, &r);
+	return r;
+}
+
+/*
+ * Wrapper for br_divrem(); the quotient is returned, and the remainder
+ * is discarded.
+ */
+static inline uint32_t
+br_div(uint32_t hi, uint32_t lo, uint32_t d)
+{
+	uint32_t r;
+
+	return br_divrem(hi, lo, d, &r);
+}
+
+/* ==================================================================== */
+
+/*
+ * Integers 'i32'
+ * --------------
+ *
+ * The 'i32' functions implement computations on big integers using
+ * an internal representation as an array of 32-bit integers. For
+ * an array x[]:
+ *  -- x[0] contains the "announced bit length" of the integer
+ *  -- x[1], x[2]... contain the value in little-endian order (x[1]
+ *     contains the least significant 32 bits)
+ *
+ * Multiplications rely on the elementary 32x32->64 multiplication.
+ *
+ * The announced bit length specifies the number of bits that are
+ * significant in the subsequent 32-bit words. Unused bits in the
+ * last (most significant) word are set to 0; subsequent words are
+ * uninitialized and need not exist at all.
+ *
+ * The execution time and memory access patterns of all computations
+ * depend on the announced bit length, but not on the actual word
+ * values. For modular integers, the announced bit length of any integer
+ * modulo n is equal to the actual bit length of n; thus, computations
+ * on modular integers are "constant-time" (only the modulus length may
+ * leak).
+ */
+
+/*
+ * Compute the actual bit length of an integer. The argument x should
+ * point to the first (least significant) value word of the integer.
+ * The len 'xlen' contains the number of 32-bit words to access.
+ *
+ * CT: value or length of x does not leak.
+ */
+uint32_t br_i32_bit_length(uint32_t *x, size_t xlen);
+
+/*
+ * Decode an integer from its big-endian unsigned representation. The
+ * "true" bit length of the integer is computed, but all words of x[]
+ * corresponding to the full 'len' bytes of the source are set.
+ *
+ * CT: value or length of x does not leak.
+ */
+void br_i32_decode(uint32_t *x, const void *src, size_t len);
+
+/*
+ * Decode an integer from its big-endian unsigned representation. The
+ * integer MUST be lower than m[]; the announced bit length written in
+ * x[] will be equal to that of m[]. All 'len' bytes from the source are
+ * read.
+ *
+ * Returned value is 1 if the decode value fits within the modulus, 0
+ * otherwise. In the latter case, the x[] buffer will be set to 0 (but
+ * still with the announced bit length of m[]).
+ *
+ * CT: value or length of x does not leak. Memory access pattern depends
+ * only of 'len' and the announced bit length of m. Whether x fits or
+ * not does not leak either.
+ */
+uint32_t br_i32_decode_mod(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m);
+
+/*
+ * Reduce an integer (a[]) modulo another (m[]). The result is written
+ * in x[] and its announced bit length is set to be equal to that of m[].
+ *
+ * x[] MUST be distinct from a[] and m[].
+ *
+ * CT: only announced bit lengths leak, not values of x, a or m.
+ */
+void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m);
+
+/*
+ * Decode an integer from its big-endian unsigned representation, and
+ * reduce it modulo the provided modulus m[]. The announced bit length
+ * of the result is set to be equal to that of the modulus.
+ *
+ * x[] MUST be distinct from m[].
+ */
+void br_i32_decode_reduce(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m);
+
+/*
+ * Encode an integer into its big-endian unsigned representation. The
+ * output length in bytes is provided (parameter 'len'); if the length
+ * is too short then the integer is appropriately truncated; if it is
+ * too long then the extra bytes are set to 0.
+ */
+void br_i32_encode(void *dst, size_t len, const uint32_t *x);
+
+/*
+ * Multiply x[] by 2^32 and then add integer z, modulo m[]. This
+ * function assumes that x[] and m[] have the same announced bit
+ * length, and the announced bit length of m[] matches its true
+ * bit length.
+ *
+ * x[] and m[] MUST be distinct arrays.
+ *
+ * CT: only the common announced bit length of x and m leaks, not
+ * the values of x, z or m.
+ */
+void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m);
+
+/*
+ * Extract one word from an integer. The offset is counted in bits.
+ * The word MUST entirely fit within the word elements corresponding
+ * to the announced bit length of a[].
+ */
+static inline uint32_t
+br_i32_word(const uint32_t *a, uint32_t off)
+{
+	size_t u;
+	unsigned j;
+
+	u = (size_t)(off >> 5) + 1;
+	j = (unsigned)off & 31;
+	if (j == 0) {
+		return a[u];
+	} else {
+		return (a[u] >> j) | (a[u + 1] << (32 - j));
+	}
+}
+
+/*
+ * Test whether an integer is zero.
+ */
+uint32_t br_i32_iszero(const uint32_t *x);
+
+/*
+ * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
+ * is unmodified, but the carry is still computed and returned. The
+ * arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl);
+
+/*
+ * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
+ * then a[] is unmodified, but the carry is still computed and returned.
+ * The arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl);
+
+/*
+ * Compute d+a*b, result in d. The initial announced bit length of d[]
+ * MUST match that of a[]. The d[] array MUST be large enough to
+ * accommodate the full result, plus (possibly) an extra word. The
+ * resulting announced bit length of d[] will be the sum of the announced
+ * bit lengths of a[] and b[] (therefore, it may be larger than the actual
+ * bit length of the numerical result).
+ *
+ * a[] and b[] may be the same array. d[] must be disjoint from both a[]
+ * and b[].
+ */
+void br_i32_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b);
+
+/*
+ * Zeroize an integer. The announced bit length is set to the provided
+ * value, and the corresponding words are set to 0.
+ */
+static inline void
+br_i32_zero(uint32_t *x, uint32_t bit_len)
+{
+	*x ++ = bit_len;
+	memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x);
+}
+
+/*
+ * Compute -(1/x) mod 2^32. If x is even, then this function returns 0.
+ */
+uint32_t br_i32_ninv32(uint32_t x);
+
+/*
+ * Convert a modular integer to Montgomery representation. The integer x[]
+ * MUST be lower than m[], but with the same announced bit length.
+ */
+void br_i32_to_monty(uint32_t *x, const uint32_t *m);
+
+/*
+ * Convert a modular integer back from Montgomery representation. The
+ * integer x[] MUST be lower than m[], but with the same announced bit
+ * length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is
+ * the least significant value word of m[] (this works only if m[] is
+ * an odd integer).
+ */
+void br_i32_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i);
+
+/*
+ * Compute a modular Montgomery multiplication. d[] is filled with the
+ * value of x*y/R modulo m[] (where R is the Montgomery factor). The
+ * array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be
+ * numerically lower than m[]. x[] and y[] MAY be the same array. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer).
+ */
+void br_i32_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i);
+
+/*
+ * Compute a modular exponentiation. x[] MUST be an integer modulo m[]
+ * (same announced bit length, lower value). m[] MUST be odd. The
+ * exponent is in big-endian unsigned notation, over 'elen' bytes. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer). The t1[] and t2[] parameters must be temporary arrays,
+ * each large enough to accommodate an integer with the same size as m[].
+ */
+void br_i32_modpow(uint32_t *x, const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2);
+
+/* ==================================================================== */
+
+/*
+ * Integers 'i31'
+ * --------------
+ *
+ * The 'i31' functions implement computations on big integers using
+ * an internal representation as an array of 32-bit integers. For
+ * an array x[]:
+ *  -- x[0] encodes the array length and the "announced bit length"
+ *     of the integer: namely, if the announced bit length is k,
+ *     then x[0] = ((k / 31) << 5) + (k % 31).
+ *  -- x[1], x[2]... contain the value in little-endian order, 31
+ *     bits per word (x[1] contains the least significant 31 bits).
+ *     The upper bit of each word is 0.
+ *
+ * Multiplications rely on the elementary 32x32->64 multiplication.
+ *
+ * The announced bit length specifies the number of bits that are
+ * significant in the subsequent 32-bit words. Unused bits in the
+ * last (most significant) word are set to 0; subsequent words are
+ * uninitialized and need not exist at all.
+ *
+ * The execution time and memory access patterns of all computations
+ * depend on the announced bit length, but not on the actual word
+ * values. For modular integers, the announced bit length of any integer
+ * modulo n is equal to the actual bit length of n; thus, computations
+ * on modular integers are "constant-time" (only the modulus length may
+ * leak).
+ */
+
+/*
+ * Test whether an integer is zero.
+ */
+uint32_t br_i31_iszero(const uint32_t *x);
+
+/*
+ * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
+ * is unmodified, but the carry is still computed and returned. The
+ * arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+uint32_t br_i31_add(uint32_t *a, const uint32_t *b, uint32_t ctl);
+
+/*
+ * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
+ * then a[] is unmodified, but the carry is still computed and returned.
+ * The arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+uint32_t br_i31_sub(uint32_t *a, const uint32_t *b, uint32_t ctl);
+
+/*
+ * Compute the ENCODED actual bit length of an integer. The argument x
+ * should point to the first (least significant) value word of the
+ * integer. The len 'xlen' contains the number of 32-bit words to
+ * access. The upper bit of each value word MUST be 0.
+ * Returned value is ((k / 31) << 5) + (k % 31) if the bit length is k.
+ *
+ * CT: value or length of x does not leak.
+ */
+uint32_t br_i31_bit_length(uint32_t *x, size_t xlen);
+
+/*
+ * Decode an integer from its big-endian unsigned representation. The
+ * "true" bit length of the integer is computed and set in the encoded
+ * announced bit length (x[0]), but all words of x[] corresponding to
+ * the full 'len' bytes of the source are set.
+ *
+ * CT: value or length of x does not leak.
+ */
+void br_i31_decode(uint32_t *x, const void *src, size_t len);
+
+/*
+ * Decode an integer from its big-endian unsigned representation. The
+ * integer MUST be lower than m[]; the (encoded) announced bit length
+ * written in x[] will be equal to that of m[]. All 'len' bytes from the
+ * source are read.
+ *
+ * Returned value is 1 if the decode value fits within the modulus, 0
+ * otherwise. In the latter case, the x[] buffer will be set to 0 (but
+ * still with the announced bit length of m[]).
+ *
+ * CT: value or length of x does not leak. Memory access pattern depends
+ * only of 'len' and the announced bit length of m. Whether x fits or
+ * not does not leak either.
+ */
+uint32_t br_i31_decode_mod(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m);
+
+/*
+ * Zeroize an integer. The announced bit length is set to the provided
+ * value, and the corresponding words are set to 0. The ENCODED bit length
+ * is expected here.
+ */
+static inline void
+br_i31_zero(uint32_t *x, uint32_t bit_len)
+{
+	*x ++ = bit_len;
+	memset(x, 0, ((bit_len + 31) >> 5) * sizeof *x);
+}
+
+/*
+ * Right-shift an integer. The shift amount must be lower than 31
+ * bits.
+ */
+void br_i31_rshift(uint32_t *x, int count);
+
+/*
+ * Reduce an integer (a[]) modulo another (m[]). The result is written
+ * in x[] and its announced bit length is set to be equal to that of m[].
+ *
+ * x[] MUST be distinct from a[] and m[].
+ *
+ * CT: only announced bit lengths leak, not values of x, a or m.
+ */
+void br_i31_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m);
+
+/*
+ * Decode an integer from its big-endian unsigned representation, and
+ * reduce it modulo the provided modulus m[]. The announced bit length
+ * of the result is set to be equal to that of the modulus.
+ *
+ * x[] MUST be distinct from m[].
+ */
+void br_i31_decode_reduce(uint32_t *x,
+	const void *src, size_t len, const uint32_t *m);
+
+/*
+ * Multiply x[] by 2^31 and then add integer z, modulo m[]. This
+ * function assumes that x[] and m[] have the same announced bit
+ * length, the announced bit length of m[] matches its true
+ * bit length.
+ *
+ * x[] and m[] MUST be distinct arrays. z MUST fit in 31 bits (upper
+ * bit set to 0).
+ *
+ * CT: only the common announced bit length of x and m leaks, not
+ * the values of x, z or m.
+ */
+void br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m);
+
+/*
+ * Encode an integer into its big-endian unsigned representation. The
+ * output length in bytes is provided (parameter 'len'); if the length
+ * is too short then the integer is appropriately truncated; if it is
+ * too long then the extra bytes are set to 0.
+ */
+void br_i31_encode(void *dst, size_t len, const uint32_t *x);
+
+/*
+ * Compute -(1/x) mod 2^31. If x is even, then this function returns 0.
+ */
+uint32_t br_i31_ninv31(uint32_t x);
+
+/*
+ * Compute a modular Montgomery multiplication. d[] is filled with the
+ * value of x*y/R modulo m[] (where R is the Montgomery factor). The
+ * array d[] MUST be distinct from x[], y[] and m[]. x[] and y[] MUST be
+ * numerically lower than m[]. x[] and y[] MAY be the same array. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer).
+ */
+void br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i);
+
+/*
+ * Convert a modular integer to Montgomery representation. The integer x[]
+ * MUST be lower than m[], but with the same announced bit length.
+ */
+void br_i31_to_monty(uint32_t *x, const uint32_t *m);
+
+/*
+ * Convert a modular integer back from Montgomery representation. The
+ * integer x[] MUST be lower than m[], but with the same announced bit
+ * length. The "m0i" parameter is equal to -(1/m0) mod 2^32, where m0 is
+ * the least significant value word of m[] (this works only if m[] is
+ * an odd integer).
+ */
+void br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i);
+
+/*
+ * Compute a modular exponentiation. x[] MUST be an integer modulo m[]
+ * (same announced bit length, lower value). m[] MUST be odd. The
+ * exponent is in big-endian unsigned notation, over 'elen' bytes. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer). The t1[] and t2[] parameters must be temporary arrays,
+ * each large enough to accommodate an integer with the same size as m[].
+ */
+void br_i31_modpow(uint32_t *x, const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *t1, uint32_t *t2);
+
+/*
+ * Compute a modular exponentiation. x[] MUST be an integer modulo m[]
+ * (same announced bit length, lower value). m[] MUST be odd. The
+ * exponent is in big-endian unsigned notation, over 'elen' bytes. The
+ * "m0i" parameter is equal to -(1/m0) mod 2^31, where m0 is the least
+ * significant value word of m[] (this works only if m[] is an odd
+ * integer). The tmp[] array is used for temporaries, and has size
+ * 'twlen' words; it must be large enough to accommodate at least two
+ * temporary values with the same size as m[] (including the leading
+ * "bit length" word). If there is room for more temporaries, then this
+ * function may use the extra room for window-based optimisation,
+ * resulting in faster computations.
+ *
+ * Returned value is 1 on success, 0 on error. An error is reported if
+ * the provided tmp[] array is too short.
+ */
+uint32_t br_i31_modpow_opt(uint32_t *x, const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);
+
+/*
+ * Compute d+a*b, result in d. The initial announced bit length of d[]
+ * MUST match that of a[]. The d[] array MUST be large enough to
+ * accommodate the full result, plus (possibly) an extra word. The
+ * resulting announced bit length of d[] will be the sum of the announced
+ * bit lengths of a[] and b[] (therefore, it may be larger than the actual
+ * bit length of the numerical result).
+ *
+ * a[] and b[] may be the same array. d[] must be disjoint from both a[]
+ * and b[].
+ */
+void br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b);
+
+/*
+ * Compute x/y mod m, result in x. Values x and y must be between 0 and
+ * m-1, and have the same announced bit length as m. Modulus m must be
+ * odd. The "m0i" parameter is equal to -1/m mod 2^31. The array 't'
+ * must point to a temporary area that can hold at least three integers
+ * of the size of m.
+ *
+ * m may not overlap x and y. x and y may overlap each other (this can
+ * be useful to test whether a value is invertible modulo m). t must be
+ * disjoint from all other arrays.
+ *
+ * Returned value is 1 on success, 0 otherwise. Success is attained if
+ * y is invertible modulo m.
+ */
+uint32_t br_i31_moddiv(uint32_t *x, const uint32_t *y,
+	const uint32_t *m, uint32_t m0i, uint32_t *t);
+
+/* ==================================================================== */
+
+/*
+ * FIXME: document "i15" functions.
+ */
+
+static inline void
+br_i15_zero(uint16_t *x, uint16_t bit_len)
+{
+	*x ++ = bit_len;
+	memset(x, 0, ((bit_len + 15) >> 4) * sizeof *x);
+}
+
+uint32_t br_i15_iszero(const uint16_t *x);
+
+uint16_t br_i15_ninv15(uint16_t x);
+
+uint32_t br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl);
+
+uint32_t br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl);
+
+void br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m);
+
+void br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y,
+	const uint16_t *m, uint16_t m0i);
+
+void br_i15_to_monty(uint16_t *x, const uint16_t *m);
+
+void br_i15_modpow(uint16_t *x, const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2);
+
+uint32_t br_i15_modpow_opt(uint16_t *x, const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *tmp, size_t twlen);
+
+void br_i15_encode(void *dst, size_t len, const uint16_t *x);
+
+uint32_t br_i15_decode_mod(uint16_t *x,
+	const void *src, size_t len, const uint16_t *m);
+
+void br_i15_rshift(uint16_t *x, int count);
+
+uint32_t br_i15_bit_length(uint16_t *x, size_t xlen);
+
+void br_i15_decode(uint16_t *x, const void *src, size_t len);
+
+void br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i);
+
+void br_i15_decode_reduce(uint16_t *x,
+	const void *src, size_t len, const uint16_t *m);
+
+void br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m);
+
+void br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b);
+
+uint32_t br_i15_moddiv(uint16_t *x, const uint16_t *y,
+	const uint16_t *m, uint16_t m0i, uint16_t *t);
+
+/*
+ * Variant of br_i31_modpow_opt() that internally uses 64x64->128
+ * multiplications. It expects the same parameters as br_i31_modpow_opt(),
+ * except that the temporaries should be 64-bit integers, not 32-bit
+ * integers.
+ */
+uint32_t br_i62_modpow_opt(uint32_t *x31, const unsigned char *e, size_t elen,
+	const uint32_t *m31, uint32_t m0i31, uint64_t *tmp, size_t twlen);
+
+/*
+ * Type for a function with the same API as br_i31_modpow_opt() (some
+ * implementations of this type may have stricter alignment requirements
+ * on the temporaries).
+ */
+typedef uint32_t (*br_i31_modpow_opt_type)(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);
+
+/*
+ * Wrapper for br_i62_modpow_opt() that uses the same type as
+ * br_i31_modpow_opt(); however, it requires its 'tmp' argument to the
+ * 64-bit aligned.
+ */
+uint32_t br_i62_modpow_opt_as_i31(uint32_t *x,
+	const unsigned char *e, size_t elen,
+	const uint32_t *m, uint32_t m0i, uint32_t *tmp, size_t twlen);
+
+/* ==================================================================== */
+
+static inline size_t
+br_digest_size(const br_hash_class *digest_class)
+{
+	return (size_t)(digest_class->desc >> BR_HASHDESC_OUT_OFF)
+		& BR_HASHDESC_OUT_MASK;
+}
+
+/*
+ * Get the output size (in bytes) of a hash function.
+ */
+size_t br_digest_size_by_ID(int digest_id);
+
+/*
+ * Get the OID (encoded OBJECT IDENTIFIER value, without tag and length)
+ * for a hash function. If digest_id is not a supported digest identifier
+ * (in particular if it is equal to 0, i.e. br_md5sha1_ID), then NULL is
+ * returned and *len is set to 0.
+ */
+const unsigned char *br_digest_OID(int digest_id, size_t *len);
+
+/* ==================================================================== */
+/*
+ * DES support functions.
+ */
+
+/*
+ * Apply DES Initial Permutation.
+ */
+void br_des_do_IP(uint32_t *xl, uint32_t *xr);
+
+/*
+ * Apply DES Final Permutation (inverse of IP).
+ */
+void br_des_do_invIP(uint32_t *xl, uint32_t *xr);
+
+/*
+ * Key schedule unit: for a DES key (8 bytes), compute 16 subkeys. Each
+ * subkey is two 28-bit words represented as two 32-bit words; the PC-2
+ * bit extration is NOT applied.
+ */
+void br_des_keysched_unit(uint32_t *skey, const void *key);
+
+/*
+ * Reversal of 16 DES sub-keys (for decryption).
+ */
+void br_des_rev_skey(uint32_t *skey);
+
+/*
+ * DES/3DES key schedule for 'des_tab' (encryption direction). Returned
+ * value is the number of rounds.
+ */
+unsigned br_des_tab_keysched(uint32_t *skey, const void *key, size_t key_len);
+
+/*
+ * DES/3DES key schedule for 'des_ct' (encryption direction). Returned
+ * value is the number of rounds.
+ */
+unsigned br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len);
+
+/*
+ * DES/3DES subkey decompression (from the compressed bitsliced subkeys).
+ */
+void br_des_ct_skey_expand(uint32_t *sk_exp,
+	unsigned num_rounds, const uint32_t *skey);
+
+/*
+ * DES/3DES block encryption/decryption ('des_tab').
+ */
+void br_des_tab_process_block(unsigned num_rounds,
+	const uint32_t *skey, void *block);
+
+/*
+ * DES/3DES block encryption/decryption ('des_ct').
+ */
+void br_des_ct_process_block(unsigned num_rounds,
+	const uint32_t *skey, void *block);
+
+/* ==================================================================== */
+/*
+ * AES support functions.
+ */
+
+/*
+ * The AES S-box (256-byte table).
+ */
+extern const unsigned char br_aes_S[];
+
+/*
+ * AES key schedule. skey[] is filled with n+1 128-bit subkeys, where n
+ * is the number of rounds (10 to 14, depending on key size). The number
+ * of rounds is returned. If the key size is invalid (not 16, 24 or 32),
+ * then 0 is returned.
+ *
+ * This implementation uses a 256-byte table and is NOT constant-time.
+ */
+unsigned br_aes_keysched(uint32_t *skey, const void *key, size_t key_len);
+
+/*
+ * AES key schedule for decryption ('aes_big' implementation).
+ */
+unsigned br_aes_big_keysched_inv(uint32_t *skey,
+	const void *key, size_t key_len);
+
+/*
+ * AES block encryption with the 'aes_big' implementation (fast, but
+ * not constant-time). This function encrypts a single block "in place".
+ */
+void br_aes_big_encrypt(unsigned num_rounds, const uint32_t *skey, void *data);
+
+/*
+ * AES block decryption with the 'aes_big' implementation (fast, but
+ * not constant-time). This function decrypts a single block "in place".
+ */
+void br_aes_big_decrypt(unsigned num_rounds, const uint32_t *skey, void *data);
+
+/*
+ * AES block encryption with the 'aes_small' implementation (small, but
+ * slow and not constant-time). This function encrypts a single block
+ * "in place".
+ */
+void br_aes_small_encrypt(unsigned num_rounds,
+	const uint32_t *skey, void *data);
+
+/*
+ * AES block decryption with the 'aes_small' implementation (small, but
+ * slow and not constant-time). This function decrypts a single block
+ * "in place".
+ */
+void br_aes_small_decrypt(unsigned num_rounds,
+	const uint32_t *skey, void *data);
+
+/*
+ * The constant-time implementation is "bitsliced": the 128-bit state is
+ * split over eight 32-bit words q* in the following way:
+ *
+ * -- Input block consists in 16 bytes:
+ *    a00 a10 a20 a30 a01 a11 a21 a31 a02 a12 a22 a32 a03 a13 a23 a33
+ * In the terminology of FIPS 197, this is a 4x4 matrix which is read
+ * column by column.
+ *
+ * -- Each byte is split into eight bits which are distributed over the
+ * eight words, at the same rank. Thus, for a byte x at rank k, bit 0
+ * (least significant) of x will be at rank k in q0 (if that bit is b,
+ * then it contributes "b << k" to the value of q0), bit 1 of x will be
+ * at rank k in q1, and so on.
+ *
+ * -- Ranks given to bits are in "row order" and are either all even, or
+ * all odd. Two independent AES states are thus interleaved, one using
+ * the even ranks, the other the odd ranks. Row order means:
+ *    a00 a01 a02 a03 a10 a11 a12 a13 a20 a21 a22 a23 a30 a31 a32 a33
+ *
+ * Converting input bytes from two AES blocks to bitslice representation
+ * is done in the following way:
+ * -- Decode first block into the four words q0 q2 q4 q6, in that order,
+ * using little-endian convention.
+ * -- Decode second block into the four words q1 q3 q5 q7, in that order,
+ * using little-endian convention.
+ * -- Call br_aes_ct_ortho().
+ *
+ * Converting back to bytes is done by using the reverse operations. Note
+ * that br_aes_ct_ortho() is its own inverse.
+ */
+
+/*
+ * Perform bytewise orthogonalization of eight 32-bit words. Bytes
+ * of q0..q7 are spread over all words: for a byte x that occurs
+ * at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit
+ * of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j.
+ *
+ * This operation is an involution.
+ */
+void br_aes_ct_ortho(uint32_t *q);
+
+/*
+ * The AES S-box, as a bitsliced constant-time version. The input array
+ * consists in eight 32-bit words; 32 S-box instances are computed in
+ * parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant)
+ * are spread over the words 0 to 7, at the same rank.
+ */
+void br_aes_ct_bitslice_Sbox(uint32_t *q);
+
+/*
+ * Like br_aes_bitslice_Sbox(), but for the inverse S-box.
+ */
+void br_aes_ct_bitslice_invSbox(uint32_t *q);
+
+/*
+ * Compute AES encryption on bitsliced data. Since input is stored on
+ * eight 32-bit words, two block encryptions are actually performed
+ * in parallel.
+ */
+void br_aes_ct_bitslice_encrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q);
+
+/*
+ * Compute AES decryption on bitsliced data. Since input is stored on
+ * eight 32-bit words, two block decryptions are actually performed
+ * in parallel.
+ */
+void br_aes_ct_bitslice_decrypt(unsigned num_rounds,
+	const uint32_t *skey, uint32_t *q);
+
+/*
+ * AES key schedule, constant-time version. skey[] is filled with n+1
+ * 128-bit subkeys, where n is the number of rounds (10 to 14, depending
+ * on key size). The number of rounds is returned. If the key size is
+ * invalid (not 16, 24 or 32), then 0 is returned.
+ */
+unsigned br_aes_ct_keysched(uint32_t *comp_skey,
+	const void *key, size_t key_len);
+
+/*
+ * Expand AES subkeys as produced by br_aes_ct_keysched(), into
+ * a larger array suitable for br_aes_ct_bitslice_encrypt() and
+ * br_aes_ct_bitslice_decrypt().
+ */
+void br_aes_ct_skey_expand(uint32_t *skey,
+	unsigned num_rounds, const uint32_t *comp_skey);
+
+/*
+ * For the ct64 implementation, the same bitslicing technique is used,
+ * but four instances are interleaved. First instance uses bits 0, 4,
+ * 8, 12,... of each word; second instance uses bits 1, 5, 9, 13,...
+ * and so on.
+ */
+
+/*
+ * Perform bytewise orthogonalization of eight 64-bit words. Bytes
+ * of q0..q7 are spread over all words: for a byte x that occurs
+ * at rank i in q[j] (byte x uses bits 8*i to 8*i+7 in q[j]), the bit
+ * of rank k in x (0 <= k <= 7) goes to q[k] at rank 8*i+j.
+ *
+ * This operation is an involution.
+ */
+void br_aes_ct64_ortho(uint64_t *q);
+
+/*
+ * Interleave bytes for an AES input block. If input bytes are
+ * denoted 0123456789ABCDEF, and have been decoded with little-endian
+ * convention (w[0] contains 0123, with '3' being most significant;
+ * w[1] contains 4567, and so on), then output word q0 will be
+ * set to 08192A3B (again little-endian convention) and q1 will
+ * be set to 4C5D6E7F.
+ */
+void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w);
+
+/*
+ * Perform the opposite of br_aes_ct64_interleave_in().
+ */
+void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1);
+
+/*
+ * The AES S-box, as a bitsliced constant-time version. The input array
+ * consists in eight 64-bit words; 64 S-box instances are computed in
+ * parallel. Bits 0 to 7 of each S-box input (bit 0 is least significant)
+ * are spread over the words 0 to 7, at the same rank.
+ */
+void br_aes_ct64_bitslice_Sbox(uint64_t *q);
+
+/*
+ * Like br_aes_bitslice_Sbox(), but for the inverse S-box.
+ */
+void br_aes_ct64_bitslice_invSbox(uint64_t *q);
+
+/*
+ * Compute AES encryption on bitsliced data. Since input is stored on
+ * eight 64-bit words, four block encryptions are actually performed
+ * in parallel.
+ */
+void br_aes_ct64_bitslice_encrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q);
+
+/*
+ * Compute AES decryption on bitsliced data. Since input is stored on
+ * eight 64-bit words, four block decryptions are actually performed
+ * in parallel.
+ */
+void br_aes_ct64_bitslice_decrypt(unsigned num_rounds,
+	const uint64_t *skey, uint64_t *q);
+
+/*
+ * AES key schedule, constant-time version. skey[] is filled with n+1
+ * 128-bit subkeys, where n is the number of rounds (10 to 14, depending
+ * on key size). The number of rounds is returned. If the key size is
+ * invalid (not 16, 24 or 32), then 0 is returned.
+ */
+unsigned br_aes_ct64_keysched(uint64_t *comp_skey,
+	const void *key, size_t key_len);
+
+/*
+ * Expand AES subkeys as produced by br_aes_ct64_keysched(), into
+ * a larger array suitable for br_aes_ct64_bitslice_encrypt() and
+ * br_aes_ct64_bitslice_decrypt().
+ */
+void br_aes_ct64_skey_expand(uint64_t *skey,
+	unsigned num_rounds, const uint64_t *comp_skey);
+
+/*
+ * Test support for AES-NI opcodes.
+ */
+int br_aes_x86ni_supported(void);
+
+/*
+ * AES key schedule, using x86 AES-NI instructions. This yields the
+ * subkeys in the encryption direction. Number of rounds is returned.
+ * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
+ */
+unsigned br_aes_x86ni_keysched_enc(unsigned char *skni,
+	const void *key, size_t len);
+
+/*
+ * AES key schedule, using x86 AES-NI instructions. This yields the
+ * subkeys in the decryption direction. Number of rounds is returned.
+ * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
+ */
+unsigned br_aes_x86ni_keysched_dec(unsigned char *skni,
+	const void *key, size_t len);
+
+/*
+ * Test support for AES POWER8 opcodes.
+ */
+int br_aes_pwr8_supported(void);
+
+/*
+ * AES key schedule, using POWER8 instructions. This yields the
+ * subkeys in the encryption direction. Number of rounds is returned.
+ * Key size MUST be 16, 24 or 32 bytes; otherwise, 0 is returned.
+ */
+unsigned br_aes_pwr8_keysched(unsigned char *skni,
+	const void *key, size_t len);
+
+/* ==================================================================== */
+/*
+ * RSA.
+ */
+
+/*
+ * Apply proper PKCS#1 v1.5 padding (for signatures). 'hash_oid' is
+ * the encoded hash function OID, or NULL.
+ */
+uint32_t br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	uint32_t n_bitlen, unsigned char *x);
+
+/*
+ * Check PKCS#1 v1.5 padding (for signatures). 'hash_oid' is the encoded
+ * hash function OID, or NULL. The provided 'sig' value is _after_ the
+ * modular exponentiation, i.e. it should be the padded hash. On
+ * success, the hashed message is extracted.
+ */
+uint32_t br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len,
+	const unsigned char *hash_oid, size_t hash_len,
+	unsigned char *hash_out);
+
+/*
+ * Apply proper PSS padding. The 'x' buffer is output only: it
+ * receives the value that is to be exponentiated.
+ */
+uint32_t br_rsa_pss_sig_pad(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	uint32_t n_bitlen, unsigned char *x);
+
+/*
+ * Check PSS padding. The provided value is the one _after_
+ * the modular exponentiation; it is modified by this function.
+ * This function infers the signature length from the public key
+ * size, i.e. it assumes that this has already been verified (as
+ * part of the exponentiation).
+ */
+uint32_t br_rsa_pss_sig_unpad(
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_public_key *pk, unsigned char *x);
+
+/*
+ * Apply OAEP padding. Returned value is the actual padded string length,
+ * or zero on error.
+ */
+size_t br_rsa_oaep_pad(const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len, const br_rsa_public_key *pk,
+	void *dst, size_t dst_nax_len, const void *src, size_t src_len);
+
+/*
+ * Unravel and check OAEP padding. If the padding is correct, then 1 is
+ * returned, '*len' is adjusted to the length of the message, and the
+ * data is moved to the start of the 'data' buffer. If the padding is
+ * incorrect, then 0 is returned and '*len' is untouched. Either way,
+ * the complete buffer contents are altered.
+ */
+uint32_t br_rsa_oaep_unpad(const br_hash_class *dig,
+	const void *label, size_t label_len, void *data, size_t *len);
+
+/*
+ * Compute MGF1 for a given seed, and XOR the output into the provided
+ * buffer.
+ */
+void br_mgf1_xor(void *data, size_t len,
+	const br_hash_class *dig, const void *seed, size_t seed_len);
+
+/*
+ * Inner function for RSA key generation; used by the "i31" and "i62"
+ * implementations.
+ */
+uint32_t br_rsa_i31_keygen_inner(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31);
+
+/* ==================================================================== */
+/*
+ * Elliptic curves.
+ */
+
+/*
+ * Type for generic EC parameters: curve order (unsigned big-endian
+ * encoding) and encoded conventional generator.
+ */
+typedef struct {
+	int curve;
+	const unsigned char *order;
+	size_t order_len;
+	const unsigned char *generator;
+	size_t generator_len;
+} br_ec_curve_def;
+
+extern const br_ec_curve_def br_secp256r1;
+extern const br_ec_curve_def br_secp384r1;
+extern const br_ec_curve_def br_secp521r1;
+
+/*
+ * For Curve25519, the advertised "order" really is 2^255-1, since the
+ * point multipliction function really works over arbitrary 255-bit
+ * scalars. This value is only meant as a hint for ECDH key generation;
+ * only ECDSA uses the exact curve order, and ECDSA is not used with
+ * that specific curve.
+ */
+extern const br_ec_curve_def br_curve25519;
+
+/*
+ * Decode some bytes as an i31 integer, with truncation (corresponding
+ * to the 'bits2int' operation in RFC 6979). The target ENCODED bit
+ * length is provided as last parameter. The resulting value will have
+ * this declared bit length, and consists the big-endian unsigned decoding
+ * of exactly that many bits in the source (capped at the source length).
+ */
+void br_ecdsa_i31_bits2int(uint32_t *x,
+	const void *src, size_t len, uint32_t ebitlen);
+
+/*
+ * Decode some bytes as an i15 integer, with truncation (corresponding
+ * to the 'bits2int' operation in RFC 6979). The target ENCODED bit
+ * length is provided as last parameter. The resulting value will have
+ * this declared bit length, and consists the big-endian unsigned decoding
+ * of exactly that many bits in the source (capped at the source length).
+ */
+void br_ecdsa_i15_bits2int(uint16_t *x,
+	const void *src, size_t len, uint32_t ebitlen);
+
+/* ==================================================================== */
+/*
+ * ASN.1 support functions.
+ */
+
+/*
+ * A br_asn1_uint structure contains encoding information about an
+ * INTEGER nonnegative value: pointer to the integer contents (unsigned
+ * big-endian representation), length of the integer contents,
+ * and length of the encoded value. The data shall have minimal length:
+ *  - If the integer value is zero, then 'len' must be zero.
+ *  - If the integer value is not zero, then data[0] must be non-zero.
+ *
+ * Under these conditions, 'asn1len' is necessarily equal to either len
+ * or len+1.
+ */
+typedef struct {
+	const unsigned char *data;
+	size_t len;
+	size_t asn1len;
+} br_asn1_uint;
+
+/*
+ * Given an encoded integer (unsigned big-endian, with possible leading
+ * bytes of value 0), returned the "prepared INTEGER" structure.
+ */
+br_asn1_uint br_asn1_uint_prepare(const void *xdata, size_t xlen);
+
+/*
+ * Encode an ASN.1 length. The length of the encoded length is returned.
+ * If 'dest' is NULL, then no encoding is performed, but the length of
+ * the encoded length is still computed and returned.
+ */
+size_t br_asn1_encode_length(void *dest, size_t len);
+
+/*
+ * Convenient macro for computing lengths of lengths.
+ */
+#define len_of_len(len)   br_asn1_encode_length(NULL, len)
+
+/*
+ * Encode a (prepared) ASN.1 INTEGER. The encoded length is returned.
+ * If 'dest' is NULL, then no encoding is performed, but the length of
+ * the encoded integer is still computed and returned.
+ */
+size_t br_asn1_encode_uint(void *dest, br_asn1_uint pp);
+
+/*
+ * Get the OID that identifies an elliptic curve. Returned value is
+ * the DER-encoded OID, with the length (always one byte) but without
+ * the tag. Thus, the first byte of the returned buffer contains the
+ * number of subsequent bytes in the value. If the curve is not
+ * recognised, NULL is returned.
+ */
+const unsigned char *br_get_curve_OID(int curve);
+
+/*
+ * Inner function for EC private key encoding. This is equivalent to
+ * the API function br_encode_ec_raw_der(), except for an extra
+ * parameter: if 'include_curve_oid' is zero, then the curve OID is
+ * _not_ included in the output blob (this is for PKCS#8 support).
+ */
+size_t br_encode_ec_raw_der_inner(void *dest,
+	const br_ec_private_key *sk, const br_ec_public_key *pk,
+	int include_curve_oid);
+
+/* ==================================================================== */
+/*
+ * SSL/TLS support functions.
+ */
+
+/*
+ * Record types.
+ */
+#define BR_SSL_CHANGE_CIPHER_SPEC    20
+#define BR_SSL_ALERT                 21
+#define BR_SSL_HANDSHAKE             22
+#define BR_SSL_APPLICATION_DATA      23
+
+/*
+ * Handshake message types.
+ */
+#define BR_SSL_HELLO_REQUEST          0
+#define BR_SSL_CLIENT_HELLO           1
+#define BR_SSL_SERVER_HELLO           2
+#define BR_SSL_CERTIFICATE           11
+#define BR_SSL_SERVER_KEY_EXCHANGE   12
+#define BR_SSL_CERTIFICATE_REQUEST   13
+#define BR_SSL_SERVER_HELLO_DONE     14
+#define BR_SSL_CERTIFICATE_VERIFY    15
+#define BR_SSL_CLIENT_KEY_EXCHANGE   16
+#define BR_SSL_FINISHED              20
+
+/*
+ * Alert levels.
+ */
+#define BR_LEVEL_WARNING   1
+#define BR_LEVEL_FATAL     2
+
+/*
+ * Low-level I/O state.
+ */
+#define BR_IO_FAILED   0
+#define BR_IO_IN       1
+#define BR_IO_OUT      2
+#define BR_IO_INOUT    3
+
+/*
+ * Mark a SSL engine as failed. The provided error code is recorded if
+ * the engine was not already marked as failed. If 'err' is 0, then the
+ * engine is marked as closed (without error).
+ */
+void br_ssl_engine_fail(br_ssl_engine_context *cc, int err);
+
+/*
+ * Test whether the engine is closed (normally or as a failure).
+ */
+static inline int
+br_ssl_engine_closed(const br_ssl_engine_context *cc)
+{
+	return cc->iomode == BR_IO_FAILED;
+}
+
+/*
+ * Configure a new maximum fragment length. If possible, the maximum
+ * length for outgoing records is immediately adjusted (if there are
+ * not already too many buffered bytes for that).
+ */
+void br_ssl_engine_new_max_frag_len(
+	br_ssl_engine_context *rc, unsigned max_frag_len);
+
+/*
+ * Test whether the current incoming record has been fully received
+ * or not. This functions returns 0 only if a complete record header
+ * has been received, but some of the (possibly encrypted) payload
+ * has not yet been obtained.
+ */
+int br_ssl_engine_recvrec_finished(const br_ssl_engine_context *rc);
+
+/*
+ * Flush the current record (if not empty). This is meant to be called
+ * from the handshake processor only.
+ */
+void br_ssl_engine_flush_record(br_ssl_engine_context *cc);
+
+/*
+ * Test whether there is some accumulated payload to send.
+ */
+static inline int
+br_ssl_engine_has_pld_to_send(const br_ssl_engine_context *rc)
+{
+	return rc->oxa != rc->oxb && rc->oxa != rc->oxc;
+}
+
+/*
+ * Initialize RNG in engine. Returned value is 1 on success, 0 on error.
+ * This function will try to use the OS-provided RNG, if available. If
+ * there is no OS-provided RNG, or if it failed, and no entropy was
+ * injected by the caller, then a failure will be reported. On error,
+ * the context error code is set.
+ */
+int br_ssl_engine_init_rand(br_ssl_engine_context *cc);
+
+/*
+ * Reset the handshake-related parts of the engine.
+ */
+void br_ssl_engine_hs_reset(br_ssl_engine_context *cc,
+	void (*hsinit)(void *), void (*hsrun)(void *));
+
+/*
+ * Get the PRF to use for this context, for the provided PRF hash
+ * function ID.
+ */
+br_tls_prf_impl br_ssl_engine_get_PRF(br_ssl_engine_context *cc, int prf_id);
+
+/*
+ * Consume the provided pre-master secret and compute the corresponding
+ * master secret. The 'prf_id' is the ID of the hash function to use
+ * with the TLS 1.2 PRF (ignored if the version is TLS 1.0 or 1.1).
+ */
+void br_ssl_engine_compute_master(br_ssl_engine_context *cc,
+	int prf_id, const void *pms, size_t len);
+
+/*
+ * Switch to CBC decryption for incoming records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF (ignored if not TLS 1.2+)
+ *    mac_id           id of hash function for HMAC
+ *    bc_impl          block cipher implementation (CBC decryption)
+ *    cipher_key_len   block cipher key length (in bytes)
+ */
+void br_ssl_engine_switch_cbc_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id, int mac_id,
+	const br_block_cbcdec_class *bc_impl, size_t cipher_key_len);
+
+/*
+ * Switch to CBC encryption for outgoing records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF (ignored if not TLS 1.2+)
+ *    mac_id           id of hash function for HMAC
+ *    bc_impl          block cipher implementation (CBC encryption)
+ *    cipher_key_len   block cipher key length (in bytes)
+ */
+void br_ssl_engine_switch_cbc_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id, int mac_id,
+	const br_block_cbcenc_class *bc_impl, size_t cipher_key_len);
+
+/*
+ * Switch to GCM decryption for incoming records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ *    bc_impl          block cipher implementation (CTR)
+ *    cipher_key_len   block cipher key length (in bytes)
+ */
+void br_ssl_engine_switch_gcm_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctr_class *bc_impl, size_t cipher_key_len);
+
+/*
+ * Switch to GCM encryption for outgoing records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ *    bc_impl          block cipher implementation (CTR)
+ *    cipher_key_len   block cipher key length (in bytes)
+ */
+void br_ssl_engine_switch_gcm_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctr_class *bc_impl, size_t cipher_key_len);
+
+/*
+ * Switch to ChaCha20+Poly1305 decryption for incoming records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ */
+void br_ssl_engine_switch_chapol_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id);
+
+/*
+ * Switch to ChaCha20+Poly1305 encryption for outgoing records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ */
+void br_ssl_engine_switch_chapol_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id);
+
+/*
+ * Switch to CCM decryption for incoming records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ *    bc_impl          block cipher implementation (CTR+CBC)
+ *    cipher_key_len   block cipher key length (in bytes)
+ *    tag_len          tag length (in bytes)
+ */
+void br_ssl_engine_switch_ccm_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctrcbc_class *bc_impl,
+	size_t cipher_key_len, size_t tag_len);
+
+/*
+ * Switch to GCM encryption for outgoing records.
+ *    cc               the engine context
+ *    is_client        non-zero for a client, zero for a server
+ *    prf_id           id of hash function for PRF
+ *    bc_impl          block cipher implementation (CTR+CBC)
+ *    cipher_key_len   block cipher key length (in bytes)
+ *    tag_len          tag length (in bytes)
+ */
+void br_ssl_engine_switch_ccm_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctrcbc_class *bc_impl,
+	size_t cipher_key_len, size_t tag_len);
+
+/*
+ * Calls to T0-generated code.
+ */
+void br_ssl_hs_client_init_main(void *ctx);
+void br_ssl_hs_client_run(void *ctx);
+void br_ssl_hs_server_init_main(void *ctx);
+void br_ssl_hs_server_run(void *ctx);
+
+/*
+ * Get the hash function to use for signatures, given a bit mask of
+ * supported hash functions. This implements a strict choice order
+ * (namely SHA-256, SHA-384, SHA-512, SHA-224, SHA-1). If the mask
+ * does not document support of any of these hash functions, then this
+ * functions returns 0.
+ */
+int br_ssl_choose_hash(unsigned bf);
+
+/* ==================================================================== */
+
+/*
+ * PowerPC / POWER assembly stuff. The special BR_POWER_ASM_MACROS macro
+ * must be defined before including this file; this is done by source
+ * files that use some inline assembly for PowerPC / POWER machines.
+ */
+
+#if BR_POWER_ASM_MACROS
+
+#define lxvw4x(xt, ra, rb)        lxvw4x_(xt, ra, rb)
+#define stxvw4x(xt, ra, rb)       stxvw4x_(xt, ra, rb)
+
+#define bdnz(foo)                 bdnz_(foo)
+#define bdz(foo)                  bdz_(foo)
+#define beq(foo)                  beq_(foo)
+
+#define li(rx, value)             li_(rx, value)
+#define addi(rx, ra, imm)         addi_(rx, ra, imm)
+#define cmpldi(rx, imm)           cmpldi_(rx, imm)
+#define mtctr(rx)                 mtctr_(rx)
+#define vspltb(vrt, vrb, uim)     vspltb_(vrt, vrb, uim)
+#define vspltw(vrt, vrb, uim)     vspltw_(vrt, vrb, uim)
+#define vspltisb(vrt, imm)        vspltisb_(vrt, imm)
+#define vspltisw(vrt, imm)        vspltisw_(vrt, imm)
+#define vrlw(vrt, vra, vrb)       vrlw_(vrt, vra, vrb)
+#define vsbox(vrt, vra)           vsbox_(vrt, vra)
+#define vxor(vrt, vra, vrb)       vxor_(vrt, vra, vrb)
+#define vand(vrt, vra, vrb)       vand_(vrt, vra, vrb)
+#define vsro(vrt, vra, vrb)       vsro_(vrt, vra, vrb)
+#define vsl(vrt, vra, vrb)        vsl_(vrt, vra, vrb)
+#define vsldoi(vt, va, vb, sh)    vsldoi_(vt, va, vb, sh)
+#define vsr(vrt, vra, vrb)        vsr_(vrt, vra, vrb)
+#define vaddcuw(vrt, vra, vrb)    vaddcuw_(vrt, vra, vrb)
+#define vadduwm(vrt, vra, vrb)    vadduwm_(vrt, vra, vrb)
+#define vsububm(vrt, vra, vrb)    vsububm_(vrt, vra, vrb)
+#define vsubuwm(vrt, vra, vrb)    vsubuwm_(vrt, vra, vrb)
+#define vsrw(vrt, vra, vrb)       vsrw_(vrt, vra, vrb)
+#define vcipher(vt, va, vb)       vcipher_(vt, va, vb)
+#define vcipherlast(vt, va, vb)   vcipherlast_(vt, va, vb)
+#define vncipher(vt, va, vb)      vncipher_(vt, va, vb)
+#define vncipherlast(vt, va, vb)  vncipherlast_(vt, va, vb)
+#define vperm(vt, va, vb, vc)     vperm_(vt, va, vb, vc)
+#define vpmsumd(vt, va, vb)       vpmsumd_(vt, va, vb)
+#define xxpermdi(vt, va, vb, d)   xxpermdi_(vt, va, vb, d)
+
+#define lxvw4x_(xt, ra, rb)       "\tlxvw4x\t" #xt "," #ra "," #rb "\n"
+#define stxvw4x_(xt, ra, rb)      "\tstxvw4x\t" #xt "," #ra "," #rb "\n"
+
+#define label(foo)                #foo "%=:\n"
+#define bdnz_(foo)                "\tbdnz\t" #foo "%=\n"
+#define bdz_(foo)                 "\tbdz\t" #foo "%=\n"
+#define beq_(foo)                 "\tbeq\t" #foo "%=\n"
+
+#define li_(rx, value)            "\tli\t" #rx "," #value "\n"
+#define addi_(rx, ra, imm)        "\taddi\t" #rx "," #ra "," #imm "\n"
+#define cmpldi_(rx, imm)          "\tcmpldi\t" #rx "," #imm "\n"
+#define mtctr_(rx)                "\tmtctr\t" #rx "\n"
+#define vspltb_(vrt, vrb, uim)    "\tvspltb\t" #vrt "," #vrb "," #uim "\n"
+#define vspltw_(vrt, vrb, uim)    "\tvspltw\t" #vrt "," #vrb "," #uim "\n"
+#define vspltisb_(vrt, imm)       "\tvspltisb\t" #vrt "," #imm "\n"
+#define vspltisw_(vrt, imm)       "\tvspltisw\t" #vrt "," #imm "\n"
+#define vrlw_(vrt, vra, vrb)      "\tvrlw\t" #vrt "," #vra "," #vrb "\n"
+#define vsbox_(vrt, vra)          "\tvsbox\t" #vrt "," #vra "\n"
+#define vxor_(vrt, vra, vrb)      "\tvxor\t" #vrt "," #vra "," #vrb "\n"
+#define vand_(vrt, vra, vrb)      "\tvand\t" #vrt "," #vra "," #vrb "\n"
+#define vsro_(vrt, vra, vrb)      "\tvsro\t" #vrt "," #vra "," #vrb "\n"
+#define vsl_(vrt, vra, vrb)       "\tvsl\t" #vrt "," #vra "," #vrb "\n"
+#define vsldoi_(vt, va, vb, sh)   "\tvsldoi\t" #vt "," #va "," #vb "," #sh "\n"
+#define vsr_(vrt, vra, vrb)       "\tvsr\t" #vrt "," #vra "," #vrb "\n"
+#define vaddcuw_(vrt, vra, vrb)   "\tvaddcuw\t" #vrt "," #vra "," #vrb "\n"
+#define vadduwm_(vrt, vra, vrb)   "\tvadduwm\t" #vrt "," #vra "," #vrb "\n"
+#define vsububm_(vrt, vra, vrb)   "\tvsububm\t" #vrt "," #vra "," #vrb "\n"
+#define vsubuwm_(vrt, vra, vrb)   "\tvsubuwm\t" #vrt "," #vra "," #vrb "\n"
+#define vsrw_(vrt, vra, vrb)      "\tvsrw\t" #vrt "," #vra "," #vrb "\n"
+#define vcipher_(vt, va, vb)      "\tvcipher\t" #vt "," #va "," #vb "\n"
+#define vcipherlast_(vt, va, vb)  "\tvcipherlast\t" #vt "," #va "," #vb "\n"
+#define vncipher_(vt, va, vb)     "\tvncipher\t" #vt "," #va "," #vb "\n"
+#define vncipherlast_(vt, va, vb) "\tvncipherlast\t" #vt "," #va "," #vb "\n"
+#define vperm_(vt, va, vb, vc)    "\tvperm\t" #vt "," #va "," #vb "," #vc "\n"
+#define vpmsumd_(vt, va, vb)      "\tvpmsumd\t" #vt "," #va "," #vb "\n"
+#define xxpermdi_(vt, va, vb, d)  "\txxpermdi\t" #vt "," #va "," #vb "," #d "\n"
+
+#endif
+
+/* ==================================================================== */
+/*
+ * Special "activate intrinsics" code, needed for some compiler versions.
+ * This is defined at the end of this file, so that it won't impact any
+ * of the inline functions defined previously; and it is controlled by
+ * a specific macro defined in the caller code.
+ *
+ * Calling code conventions:
+ *
+ *  - Caller must define BR_ENABLE_INTRINSICS before including "inner.h".
+ *  - Functions that use intrinsics must be enclosed in an "enabled"
+ *    region (between BR_TARGETS_X86_UP and BR_TARGETS_X86_DOWN).
+ *  - Functions that use intrinsics must be tagged with the appropriate
+ *    BR_TARGET().
+ */
+
+#if BR_ENABLE_INTRINSICS && (BR_GCC_4_4 || BR_CLANG_3_7 || BR_MSC_2005)
+
+/*
+ * x86 intrinsics (both 32-bit and 64-bit).
+ */
+#if BR_i386 || BR_amd64
+
+/*
+ * On GCC before version 5.0, we need to use the pragma to enable the
+ * target options globally, because the 'target' function attribute
+ * appears to be unreliable. Before 4.6 we must also avoid the
+ * push_options / pop_options mechanism, because it tends to trigger
+ * some internal compiler errors.
+ */
+#if BR_GCC && !BR_GCC_5_0
+#if BR_GCC_4_6
+#define BR_TARGETS_X86_UP \
+	_Pragma("GCC push_options") \
+	_Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul,rdrnd\")")
+#define BR_TARGETS_X86_DOWN \
+	_Pragma("GCC pop_options")
+#else
+#define BR_TARGETS_X86_UP \
+	_Pragma("GCC target(\"sse2,ssse3,sse4.1,aes,pclmul\")")
+#define BR_TARGETS_X86_DOWN
+#endif
+#pragma GCC diagnostic ignored "-Wpsabi"
+#endif
+
+#if BR_CLANG && !BR_CLANG_3_8
+#undef __SSE2__
+#undef __SSE3__
+#undef __SSSE3__
+#undef __SSE4_1__
+#undef __AES__
+#undef __PCLMUL__
+#undef __RDRND__
+#define __SSE2__     1
+#define __SSE3__     1
+#define __SSSE3__    1
+#define __SSE4_1__   1
+#define __AES__      1
+#define __PCLMUL__   1
+#define __RDRND__    1
+#endif
+
+#ifndef BR_TARGETS_X86_UP
+#define BR_TARGETS_X86_UP
+#endif
+#ifndef BR_TARGETS_X86_DOWN
+#define BR_TARGETS_X86_DOWN
+#endif
+
+#if BR_GCC || BR_CLANG
+BR_TARGETS_X86_UP
+#include <x86intrin.h>
+#include <cpuid.h>
+#define br_bswap32   __builtin_bswap32
+BR_TARGETS_X86_DOWN
+#endif
+
+#if BR_MSC
+#include <stdlib.h>
+#include <intrin.h>
+#include <immintrin.h>
+#define br_bswap32   _byteswap_ulong
+#endif
+
+static inline int
+br_cpuid(uint32_t mask_eax, uint32_t mask_ebx,
+	uint32_t mask_ecx, uint32_t mask_edx)
+{
+#if BR_GCC || BR_CLANG
+	unsigned eax, ebx, ecx, edx;
+
+	if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
+		if ((eax & mask_eax) == mask_eax
+			&& (ebx & mask_ebx) == mask_ebx
+			&& (ecx & mask_ecx) == mask_ecx
+			&& (edx & mask_edx) == mask_edx)
+		{
+			return 1;
+		}
+	}
+#elif BR_MSC
+	int info[4];
+
+	__cpuid(info, 1);
+	if (((uint32_t)info[0] & mask_eax) == mask_eax
+		&& ((uint32_t)info[1] & mask_ebx) == mask_ebx
+		&& ((uint32_t)info[2] & mask_ecx) == mask_ecx
+		&& ((uint32_t)info[3] & mask_edx) == mask_edx)
+	{
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+#endif
+
+#endif
+
+/* ==================================================================== */
+
+#endif
diff --git a/third_party/bearssl/src/md5.c b/third_party/bearssl/src/md5.c
new file mode 100644
index 0000000..0df7abe
--- /dev/null
+++ b/third_party/bearssl/src/md5.c
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define F(B, C, D)     ((((C) ^ (D)) & (B)) ^ (D))
+#define G(B, C, D)     ((((C) ^ (B)) & (D)) ^ (C))
+#define H(B, C, D)     ((B) ^ (C) ^ (D))
+#define I(B, C, D)     ((C) ^ ((B) | ~(D)))
+
+#define ROTL(x, n)    (((x) << (n)) | ((x) >> (32 - (n))))
+
+/* see inner.h */
+const uint32_t br_md5_IV[4] = {
+	0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476
+};
+
+static const uint32_t K[64] = {
+	0xD76AA478, 0xE8C7B756, 0x242070DB, 0xC1BDCEEE,
+	0xF57C0FAF, 0x4787C62A, 0xA8304613, 0xFD469501,
+	0x698098D8, 0x8B44F7AF, 0xFFFF5BB1, 0x895CD7BE,
+	0x6B901122, 0xFD987193, 0xA679438E, 0x49B40821,
+
+	0xF61E2562, 0xC040B340, 0x265E5A51, 0xE9B6C7AA,
+	0xD62F105D, 0x02441453, 0xD8A1E681, 0xE7D3FBC8,
+	0x21E1CDE6, 0xC33707D6, 0xF4D50D87, 0x455A14ED,
+	0xA9E3E905, 0xFCEFA3F8, 0x676F02D9, 0x8D2A4C8A,
+
+	0xFFFA3942, 0x8771F681, 0x6D9D6122, 0xFDE5380C,
+	0xA4BEEA44, 0x4BDECFA9, 0xF6BB4B60, 0xBEBFBC70,
+	0x289B7EC6, 0xEAA127FA, 0xD4EF3085, 0x04881D05,
+	0xD9D4D039, 0xE6DB99E5, 0x1FA27CF8, 0xC4AC5665,
+
+	0xF4292244, 0x432AFF97, 0xAB9423A7, 0xFC93A039,
+	0x655B59C3, 0x8F0CCC92, 0xFFEFF47D, 0x85845DD1,
+	0x6FA87E4F, 0xFE2CE6E0, 0xA3014314, 0x4E0811A1,
+	0xF7537E82, 0xBD3AF235, 0x2AD7D2BB, 0xEB86D391
+};
+
+static const unsigned char MP[48] = {
+	1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12,
+	5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2,
+	0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9
+};
+
+/* see inner.h */
+void
+br_md5_round(const unsigned char *buf, uint32_t *val)
+{
+	uint32_t m[16];
+	uint32_t a, b, c, d;
+	int i;
+
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	/* obsolete
+	for (i = 0; i < 16; i ++) {
+		m[i] = br_dec32le(buf + (i << 2));
+	}
+	*/
+	br_range_dec32le(m, 16, buf);
+
+	for (i = 0; i < 16; i += 4) {
+		a = b + ROTL(a + F(b, c, d) + m[i + 0] + K[i + 0],  7);
+		d = a + ROTL(d + F(a, b, c) + m[i + 1] + K[i + 1], 12);
+		c = d + ROTL(c + F(d, a, b) + m[i + 2] + K[i + 2], 17);
+		b = c + ROTL(b + F(c, d, a) + m[i + 3] + K[i + 3], 22);
+	}
+	for (i = 16; i < 32; i += 4) {
+		a = b + ROTL(a + G(b, c, d) + m[MP[i - 16]] + K[i + 0],  5);
+		d = a + ROTL(d + G(a, b, c) + m[MP[i - 15]] + K[i + 1],  9);
+		c = d + ROTL(c + G(d, a, b) + m[MP[i - 14]] + K[i + 2], 14);
+		b = c + ROTL(b + G(c, d, a) + m[MP[i - 13]] + K[i + 3], 20);
+	}
+	for (i = 32; i < 48; i += 4) {
+		a = b + ROTL(a + H(b, c, d) + m[MP[i - 16]] + K[i + 0],  4);
+		d = a + ROTL(d + H(a, b, c) + m[MP[i - 15]] + K[i + 1], 11);
+		c = d + ROTL(c + H(d, a, b) + m[MP[i - 14]] + K[i + 2], 16);
+		b = c + ROTL(b + H(c, d, a) + m[MP[i - 13]] + K[i + 3], 23);
+	}
+	for (i = 48; i < 64; i += 4) {
+		a = b + ROTL(a + I(b, c, d) + m[MP[i - 16]] + K[i + 0],  6);
+		d = a + ROTL(d + I(a, b, c) + m[MP[i - 15]] + K[i + 1], 10);
+		c = d + ROTL(c + I(d, a, b) + m[MP[i - 14]] + K[i + 2], 15);
+		b = c + ROTL(b + I(c, d, a) + m[MP[i - 13]] + K[i + 3], 21);
+	}
+
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+}
+
+/* see bearssl.h */
+void
+br_md5_init(br_md5_context *cc)
+{
+	cc->vtable = &br_md5_vtable;
+	memcpy(cc->val, br_md5_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_md5_update(br_md5_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		cc->count += (uint64_t)clen;
+		if (ptr == 64) {
+			br_md5_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl.h */
+void
+br_md5_out(const br_md5_context *cc, void *dst)
+{
+	unsigned char buf[64];
+	uint32_t val[4];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_md5_round(buf, val);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	br_enc64le(buf + 56, cc->count << 3);
+	br_md5_round(buf, val);
+	br_range_enc32le(dst, val, 4);
+}
+
+/* see bearssl.h */
+uint64_t
+br_md5_state(const br_md5_context *cc, void *dst)
+{
+	br_range_enc32le(dst, cc->val, 4);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_md5_set_state(br_md5_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec32le(cc->val, 4, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+const br_hash_class br_md5_vtable = {
+	sizeof(br_md5_context),
+	BR_HASHDESC_ID(br_md5_ID)
+		| BR_HASHDESC_OUT(16)
+		| BR_HASHDESC_STATE(16)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING,
+	(void (*)(const br_hash_class **))&br_md5_init,
+	(void (*)(const br_hash_class **, const void *, size_t))&br_md5_update,
+	(void (*)(const br_hash_class *const *, void *))&br_md5_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_md5_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_md5_set_state
+};
diff --git a/third_party/bearssl/src/md5sha1.c b/third_party/bearssl/src/md5sha1.c
new file mode 100644
index 0000000..f701aee
--- /dev/null
+++ b/third_party/bearssl/src/md5sha1.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_md5sha1_init(br_md5sha1_context *cc)
+{
+	cc->vtable = &br_md5sha1_vtable;
+	memcpy(cc->val_md5, br_md5_IV, sizeof cc->val_md5);
+	memcpy(cc->val_sha1, br_sha1_IV, sizeof cc->val_sha1);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_md5sha1_update(br_md5sha1_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		cc->count += (uint64_t)clen;
+		if (ptr == 64) {
+			br_md5_round(cc->buf, cc->val_md5);
+			br_sha1_round(cc->buf, cc->val_sha1);
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl.h */
+void
+br_md5sha1_out(const br_md5sha1_context *cc, void *dst)
+{
+	unsigned char buf[64];
+	uint32_t val_md5[4];
+	uint32_t val_sha1[5];
+	size_t ptr;
+	unsigned char *out;
+	uint64_t count;
+
+	count = cc->count;
+	ptr = (size_t)count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val_md5, cc->val_md5, sizeof val_md5);
+	memcpy(val_sha1, cc->val_sha1, sizeof val_sha1);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_md5_round(buf, val_md5);
+		br_sha1_round(buf, val_sha1);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	count <<= 3;
+	br_enc64le(buf + 56, count);
+	br_md5_round(buf, val_md5);
+	br_enc64be(buf + 56, count);
+	br_sha1_round(buf, val_sha1);
+	out = dst;
+	br_range_enc32le(out, val_md5, 4);
+	br_range_enc32be(out + 16, val_sha1, 5);
+}
+
+/* see bearssl.h */
+uint64_t
+br_md5sha1_state(const br_md5sha1_context *cc, void *dst)
+{
+	unsigned char *out;
+
+	out = dst;
+	br_range_enc32le(out, cc->val_md5, 4);
+	br_range_enc32be(out + 16, cc->val_sha1, 5);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_md5sha1_set_state(br_md5sha1_context *cc, const void *stb, uint64_t count)
+{
+	const unsigned char *buf;
+
+	buf = stb;
+	br_range_dec32le(cc->val_md5, 4, buf);
+	br_range_dec32be(cc->val_sha1, 5, buf + 16);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+const br_hash_class br_md5sha1_vtable = {
+	sizeof(br_md5sha1_context),
+	BR_HASHDESC_ID(br_md5sha1_ID)
+		| BR_HASHDESC_OUT(36)
+		| BR_HASHDESC_STATE(36)
+		| BR_HASHDESC_LBLEN(6),
+	(void (*)(const br_hash_class **))&br_md5sha1_init,
+	(void (*)(const br_hash_class **, const void *, size_t))
+		&br_md5sha1_update,
+	(void (*)(const br_hash_class *const *, void *))
+		&br_md5sha1_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))
+		&br_md5sha1_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_md5sha1_set_state
+};
diff --git a/third_party/bearssl/src/mgf1.c b/third_party/bearssl/src/mgf1.c
new file mode 100644
index 0000000..7a23588
--- /dev/null
+++ b/third_party/bearssl/src/mgf1.c
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_mgf1_xor(void *data, size_t len,
+	const br_hash_class *dig, const void *seed, size_t seed_len)
+{
+	unsigned char *buf;
+	size_t u, hlen;
+	uint32_t c;
+
+	buf = data;
+	hlen = br_digest_size(dig);
+	for (u = 0, c = 0; u < len; u += hlen, c ++) {
+		br_hash_compat_context hc;
+		unsigned char tmp[64];
+		size_t v;
+
+		hc.vtable = dig;
+		dig->init(&hc.vtable);
+		dig->update(&hc.vtable, seed, seed_len);
+		br_enc32be(tmp, c);
+		dig->update(&hc.vtable, tmp, 4);
+		dig->out(&hc.vtable, tmp);
+		for (v = 0; v < hlen; v ++) {
+			if ((u + v) >= len) {
+				break;
+			}
+			buf[u + v] ^= tmp[v];
+		}
+	}
+}
diff --git a/third_party/bearssl/src/multihash.c b/third_party/bearssl/src/multihash.c
new file mode 100644
index 0000000..b6df2e0
--- /dev/null
+++ b/third_party/bearssl/src/multihash.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * An aggregate context that is large enough for all supported hash
+ * functions.
+ */
+typedef union {
+	const br_hash_class *vtable;
+	br_md5_context md5;
+	br_sha1_context sha1;
+	br_sha224_context sha224;
+	br_sha256_context sha256;
+	br_sha384_context sha384;
+	br_sha512_context sha512;
+} gen_hash_context;
+
+/*
+ * Get the offset to the state for a specific hash function within the
+ * context structure. This shall be called only for the supported hash
+ * functions,
+ */
+static size_t
+get_state_offset(int id)
+{
+	if (id >= 5) {
+		/*
+		 * SHA-384 has id 5, and SHA-512 has id 6. Both use
+		 * eight 64-bit words for their state.
+		 */
+		return offsetof(br_multihash_context, val_64)
+			+ ((size_t)(id - 5) * (8 * sizeof(uint64_t)));
+	} else {
+		/*
+		 * MD5 has id 1, SHA-1 has id 2, SHA-224 has id 3 and
+		 * SHA-256 has id 4. They use 32-bit words for their
+		 * states (4 words for MD5, 5 for SHA-1, 8 for SHA-224
+		 * and 8 for SHA-256).
+		 */
+		unsigned x;
+
+		x = id - 1;
+		x = ((x + (x & (x >> 1))) << 2) + (x >> 1);
+		return offsetof(br_multihash_context, val_32)
+			+ x * sizeof(uint32_t);
+	}
+}
+
+/* see bearssl_hash.h */
+void
+br_multihash_zero(br_multihash_context *ctx)
+{
+	/*
+	 * This is not standard, but yields very short and efficient code,
+	 * and it works "everywhere".
+	 */
+	memset(ctx, 0, sizeof *ctx);
+}
+
+/* see bearssl_hash.h */
+void
+br_multihash_init(br_multihash_context *ctx)
+{
+	int i;
+
+	ctx->count = 0;
+	for (i = 1; i <= 6; i ++) {
+		const br_hash_class *hc;
+
+		hc = ctx->impl[i - 1];
+		if (hc != NULL) {
+			gen_hash_context g;
+
+			hc->init(&g.vtable);
+			hc->state(&g.vtable,
+				(unsigned char *)ctx + get_state_offset(i));
+		}
+	}
+}
+
+/* see bearssl_hash.h */
+void
+br_multihash_update(br_multihash_context *ctx, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)ctx->count & 127;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 128 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(ctx->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		ctx->count += (uint64_t)clen;
+		if (ptr == 128) {
+			int i;
+
+			for (i = 1; i <= 6; i ++) {
+				const br_hash_class *hc;
+
+				hc = ctx->impl[i - 1];
+				if (hc != NULL) {
+					gen_hash_context g;
+					unsigned char *state;
+
+					state = (unsigned char *)ctx
+						+ get_state_offset(i);
+					hc->set_state(&g.vtable,
+						state, ctx->count - 128);
+					hc->update(&g.vtable, ctx->buf, 128);
+					hc->state(&g.vtable, state);
+				}
+			}
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl_hash.h */
+size_t
+br_multihash_out(const br_multihash_context *ctx, int id, void *dst)
+{
+	const br_hash_class *hc;
+	gen_hash_context g;
+	const unsigned char *state;
+
+	hc = ctx->impl[id - 1];
+	if (hc == NULL) {
+		return 0;
+	}
+	state = (const unsigned char *)ctx + get_state_offset(id);
+	hc->set_state(&g.vtable, state, ctx->count & ~(uint64_t)127);
+	hc->update(&g.vtable, ctx->buf, ctx->count & (uint64_t)127);
+	hc->out(&g.vtable, dst);
+	return (hc->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+}
diff --git a/third_party/bearssl/src/poly1305_ctmul.c b/third_party/bearssl/src/poly1305_ctmul.c
new file mode 100644
index 0000000..150e610
--- /dev/null
+++ b/third_party/bearssl/src/poly1305_ctmul.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Perform the inner processing of blocks for Poly1305. The accumulator
+ * and the r key are provided as arrays of 26-bit words (these words
+ * are allowed to have an extra bit, i.e. use 27 bits).
+ *
+ * On output, all accumulator words fit on 26 bits, except acc[1], which
+ * may be slightly larger (but by a very small amount only).
+ */
+static void
+poly1305_inner(uint32_t *acc, const uint32_t *r, const void *data, size_t len)
+{
+	/*
+	 * Implementation notes: we split the 130-bit values into five
+	 * 26-bit words. This gives us some space for carries.
+	 *
+	 * This code is inspired from the public-domain code available
+	 * on:
+	 *      https://github.com/floodyberry/poly1305-donna
+	 *
+	 * Since we compute modulo 2^130-5, the "upper words" become
+	 * low words with a factor of 5; that is, x*2^130 = x*5 mod p.
+	 */
+	const unsigned char *buf;
+	uint32_t a0, a1, a2, a3, a4;
+	uint32_t r0, r1, r2, r3, r4;
+	uint32_t u1, u2, u3, u4;
+
+	r0 = r[0];
+	r1 = r[1];
+	r2 = r[2];
+	r3 = r[3];
+	r4 = r[4];
+
+	u1 = r1 * 5;
+	u2 = r2 * 5;
+	u3 = r3 * 5;
+	u4 = r4 * 5;
+
+	a0 = acc[0];
+	a1 = acc[1];
+	a2 = acc[2];
+	a3 = acc[3];
+	a4 = acc[4];
+
+	buf = data;
+	while (len > 0) {
+		uint64_t w0, w1, w2, w3, w4;
+		uint64_t c;
+		unsigned char tmp[16];
+
+		/*
+		 * If there is a partial block, right-pad it with zeros.
+		 */
+		if (len < 16) {
+			memset(tmp, 0, sizeof tmp);
+			memcpy(tmp, buf, len);
+			buf = tmp;
+			len = 16;
+		}
+
+		/*
+		 * Decode next block and apply the "high bit"; that value
+		 * is added to the accumulator.
+		 */
+		a0 += br_dec32le(buf) & 0x03FFFFFF;
+		a1 += (br_dec32le(buf +  3) >> 2) & 0x03FFFFFF;
+		a2 += (br_dec32le(buf +  6) >> 4) & 0x03FFFFFF;
+		a3 += (br_dec32le(buf +  9) >> 6) & 0x03FFFFFF;
+		a4 += (br_dec32le(buf + 12) >> 8) | 0x01000000;
+
+		/*
+		 * Compute multiplication.
+		 */
+#define M(x, y)   ((uint64_t)(x) * (uint64_t)(y))
+
+		w0 = M(a0, r0) + M(a1, u4) + M(a2, u3) + M(a3, u2) + M(a4, u1);
+		w1 = M(a0, r1) + M(a1, r0) + M(a2, u4) + M(a3, u3) + M(a4, u2);
+		w2 = M(a0, r2) + M(a1, r1) + M(a2, r0) + M(a3, u4) + M(a4, u3);
+		w3 = M(a0, r3) + M(a1, r2) + M(a2, r1) + M(a3, r0) + M(a4, u4);
+		w4 = M(a0, r4) + M(a1, r3) + M(a2, r2) + M(a3, r1) + M(a4, r0);
+
+#undef M
+		/*
+		 * Perform some (partial) modular reduction. This step is
+		 * enough to keep values in ranges such that there won't
+		 * be carry overflows. Most of the reduction was done in
+		 * the multiplication step (by using the 'u*' values, and
+		 * using the fact that 2^130 = -5 mod p); here we perform
+		 * some carry propagation.
+		 */
+		c = w0 >> 26;
+		a0 = (uint32_t)w0 & 0x3FFFFFF;
+		w1 += c;
+		c = w1 >> 26;
+		a1 = (uint32_t)w1 & 0x3FFFFFF;
+		w2 += c;
+		c = w2 >> 26;
+		a2 = (uint32_t)w2 & 0x3FFFFFF;
+		w3 += c;
+		c = w3 >> 26;
+		a3 = (uint32_t)w3 & 0x3FFFFFF;
+		w4 += c;
+		c = w4 >> 26;
+		a4 = (uint32_t)w4 & 0x3FFFFFF;
+		a0 += (uint32_t)c * 5;
+		a1 += a0 >> 26;
+		a0 &= 0x3FFFFFF;
+
+		buf += 16;
+		len -= 16;
+	}
+
+	acc[0] = a0;
+	acc[1] = a1;
+	acc[2] = a2;
+	acc[3] = a3;
+	acc[4] = a4;
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_ctmul_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt)
+{
+	unsigned char pkey[32], foot[16];
+	uint32_t r[5], acc[5], cc, ctl, hi;
+	uint64_t w;
+	int i;
+
+	/*
+	 * Compute the MAC key. The 'r' value is the first 16 bytes of
+	 * pkey[].
+	 */
+	memset(pkey, 0, sizeof pkey);
+	ichacha(key, iv, 0, pkey, sizeof pkey);
+
+	/*
+	 * If encrypting, ChaCha20 must run first, followed by Poly1305.
+	 * When decrypting, the operations are reversed.
+	 */
+	if (encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+
+	/*
+	 * Run Poly1305. We must process the AAD, then ciphertext, then
+	 * the footer (with the lengths). Note that the AAD and ciphertext
+	 * are meant to be padded with zeros up to the next multiple of 16,
+	 * and the length of the footer is 16 bytes as well.
+	 */
+
+	/*
+	 * Decode the 'r' value into 26-bit words, with the "clamping"
+	 * operation applied.
+	 */
+	r[0] = br_dec32le(pkey) & 0x03FFFFFF;
+	r[1] = (br_dec32le(pkey +  3) >> 2) & 0x03FFFF03;
+	r[2] = (br_dec32le(pkey +  6) >> 4) & 0x03FFC0FF;
+	r[3] = (br_dec32le(pkey +  9) >> 6) & 0x03F03FFF;
+	r[4] = (br_dec32le(pkey + 12) >> 8) & 0x000FFFFF;
+
+	/*
+	 * Accumulator is 0.
+	 */
+	memset(acc, 0, sizeof acc);
+
+	/*
+	 * Process the additional authenticated data, ciphertext, and
+	 * footer in due order.
+	 */
+	br_enc64le(foot, (uint64_t)aad_len);
+	br_enc64le(foot + 8, (uint64_t)len);
+	poly1305_inner(acc, r, aad, aad_len);
+	poly1305_inner(acc, r, data, len);
+	poly1305_inner(acc, r, foot, sizeof foot);
+
+	/*
+	 * Finalise modular reduction. This is done with carry propagation
+	 * and applying the '2^130 = -5 mod p' rule. Note that the output
+	 * of poly1035_inner() is already mostly reduced, since only
+	 * acc[1] may be (very slightly) above 2^26. A single loop back
+	 * to acc[1] will be enough to make the value fit in 130 bits.
+	 */
+	cc = 0;
+	for (i = 1; i <= 6; i ++) {
+		int j;
+
+		j = (i >= 5) ? i - 5 : i;
+		acc[j] += cc;
+		cc = acc[j] >> 26;
+		acc[j] &= 0x03FFFFFF;
+	}
+
+	/*
+	 * We may still have a value in the 2^130-5..2^130-1 range, in
+	 * which case we must reduce it again. The code below selects,
+	 * in constant-time, between 'acc' and 'acc-p',
+	 */
+	ctl = GT(acc[0], 0x03FFFFFA);
+	for (i = 1; i < 5; i ++) {
+		ctl &= EQ(acc[i], 0x03FFFFFF);
+	}
+	cc = 5;
+	for (i = 0; i < 5; i ++) {
+		uint32_t t;
+
+		t = (acc[i] + cc);
+		cc = t >> 26;
+		t &= 0x03FFFFFF;
+		acc[i] = MUX(ctl, t, acc[i]);
+	}
+
+	/*
+	 * Convert back the accumulator to 32-bit words, and add the
+	 * 's' value (second half of pkey[]). That addition is done
+	 * modulo 2^128.
+	 */
+	w = (uint64_t)acc[0] + ((uint64_t)acc[1] << 26) + br_dec32le(pkey + 16);
+	br_enc32le((unsigned char *)tag, (uint32_t)w);
+	w = (w >> 32) + ((uint64_t)acc[2] << 20) + br_dec32le(pkey + 20);
+	br_enc32le((unsigned char *)tag + 4, (uint32_t)w);
+	w = (w >> 32) + ((uint64_t)acc[3] << 14) + br_dec32le(pkey + 24);
+	br_enc32le((unsigned char *)tag + 8, (uint32_t)w);
+	hi = (uint32_t)(w >> 32) + (acc[4] << 8) + br_dec32le(pkey + 28);
+	br_enc32le((unsigned char *)tag + 12, hi);
+
+	/*
+	 * If decrypting, then ChaCha20 runs _after_ Poly1305.
+	 */
+	if (!encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+}
diff --git a/third_party/bearssl/src/poly1305_ctmul32.c b/third_party/bearssl/src/poly1305_ctmul32.c
new file mode 100644
index 0000000..15d9635
--- /dev/null
+++ b/third_party/bearssl/src/poly1305_ctmul32.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Perform the inner processing of blocks for Poly1305.
+ */
+static void
+poly1305_inner(uint32_t *a, const uint32_t *r, const void *data, size_t len)
+{
+	/*
+	 * Implementation notes: we split the 130-bit values into ten
+	 * 13-bit words. This gives us some space for carries and allows
+	 * using only 32x32->32 multiplications, which are way faster than
+	 * 32x32->64 multiplications on the ARM Cortex-M0/M0+, and also
+	 * help in making constant-time code on the Cortex-M3.
+	 *
+	 * Since we compute modulo 2^130-5, the "upper words" become
+	 * low words with a factor of 5; that is, x*2^130 = x*5 mod p.
+	 * This has already been integrated in the r[] array, which
+	 * is extended to the 0..18 range.
+	 *
+	 * In each loop iteration, a[] and r[] words are 13-bit each,
+	 * except a[1] which may use 14 bits.
+	 */
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16];
+		uint32_t b[10];
+		unsigned u, v;
+		uint32_t z, cc1, cc2;
+
+		/*
+		 * If there is a partial block, right-pad it with zeros.
+		 */
+		if (len < 16) {
+			memset(tmp, 0, sizeof tmp);
+			memcpy(tmp, buf, len);
+			buf = tmp;
+			len = 16;
+		}
+
+		/*
+		 * Decode next block and apply the "high bit"; that value
+		 * is added to the accumulator.
+		 */
+		v = br_dec16le(buf);
+		a[0] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[2] << 3;
+		v |= buf[3] << 11;
+		a[1] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[4] << 6;
+		a[2] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[5] << 1;
+		v |= buf[6] << 9;
+		a[3] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[7] << 4;
+		v |= buf[8] << 12;
+		a[4] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[9] << 7;
+		a[5] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[10] << 2;
+		v |= buf[11] << 10;
+		a[6] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[12] << 5;
+		a[7] += v & 0x01FFF;
+		v = br_dec16le(buf + 13);
+		a[8] += v & 0x01FFF;
+		v >>= 13;
+		v |= buf[15] << 3;
+		a[9] += v | 0x00800;
+
+		/*
+		 * At that point, all a[] values fit on 14 bits, while
+		 * all r[] values fit on 13 bits. Thus products fit on
+		 * 27 bits, and we can accumulate up to 31 of them in
+		 * a 32-bit word and still have some room for carries.
+		 */
+
+		/*
+		 * Now a[] contains words with values up to 14 bits each.
+		 * We perform the multiplication with r[].
+		 *
+		 * The extended words of r[] may be larger than 13 bits
+		 * (they are 5 times a 13-bit word) so the full summation
+		 * may yield values up to 46 times a 27-bit word, which
+		 * does not fit on a 32-bit word. To avoid that issue, we
+		 * must split the loop below in two, with a carry
+		 * propagation operation in the middle.
+		 */
+		cc1 = 0;
+		for (u = 0; u < 10; u ++) {
+			uint32_t s;
+
+			s = cc1
+				+ MUL15(a[0], r[u + 9 - 0])
+				+ MUL15(a[1], r[u + 9 - 1])
+				+ MUL15(a[2], r[u + 9 - 2])
+				+ MUL15(a[3], r[u + 9 - 3])
+				+ MUL15(a[4], r[u + 9 - 4]);
+			b[u] = s & 0x1FFF;
+			cc1 = s >> 13;
+		}
+		cc2 = 0;
+		for (u = 0; u < 10; u ++) {
+			uint32_t s;
+
+			s = b[u] + cc2
+				+ MUL15(a[5], r[u + 9 - 5])
+				+ MUL15(a[6], r[u + 9 - 6])
+				+ MUL15(a[7], r[u + 9 - 7])
+				+ MUL15(a[8], r[u + 9 - 8])
+				+ MUL15(a[9], r[u + 9 - 9]);
+			b[u] = s & 0x1FFF;
+			cc2 = s >> 13;
+		}
+		memcpy(a, b, sizeof b);
+
+		/*
+		 * The two carries "loop back" with a factor of 5. We
+		 * propagate them into a[0] and a[1].
+		 */
+		z = cc1 + cc2;
+		z += (z << 2) + a[0];
+		a[0] = z & 0x1FFF;
+		a[1] += z >> 13;
+
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_ctmul32_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt)
+{
+	unsigned char pkey[32], foot[16];
+	uint32_t z, r[19], acc[10], cc, ctl;
+	int i;
+
+	/*
+	 * Compute the MAC key. The 'r' value is the first 16 bytes of
+	 * pkey[].
+	 */
+	memset(pkey, 0, sizeof pkey);
+	ichacha(key, iv, 0, pkey, sizeof pkey);
+
+	/*
+	 * If encrypting, ChaCha20 must run first, followed by Poly1305.
+	 * When decrypting, the operations are reversed.
+	 */
+	if (encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+
+	/*
+	 * Run Poly1305. We must process the AAD, then ciphertext, then
+	 * the footer (with the lengths). Note that the AAD and ciphertext
+	 * are meant to be padded with zeros up to the next multiple of 16,
+	 * and the length of the footer is 16 bytes as well.
+	 */
+
+	/*
+	 * Decode the 'r' value into 13-bit words, with the "clamping"
+	 * operation applied.
+	 */
+	z = br_dec32le(pkey) & 0x03FFFFFF;
+	r[9] = z & 0x1FFF;
+	r[10] = z >> 13;
+	z = (br_dec32le(pkey +  3) >> 2) & 0x03FFFF03;
+	r[11] = z & 0x1FFF;
+	r[12] = z >> 13;
+	z = (br_dec32le(pkey +  6) >> 4) & 0x03FFC0FF;
+	r[13] = z & 0x1FFF;
+	r[14] = z >> 13;
+	z = (br_dec32le(pkey +  9) >> 6) & 0x03F03FFF;
+	r[15] = z & 0x1FFF;
+	r[16] = z >> 13;
+	z = (br_dec32le(pkey + 12) >> 8) & 0x000FFFFF;
+	r[17] = z & 0x1FFF;
+	r[18] = z >> 13;
+
+	/*
+	 * Extend r[] with the 5x factor pre-applied.
+	 */
+	for (i = 0; i < 9; i ++) {
+		r[i] = MUL15(5, r[i + 10]);
+	}
+
+	/*
+	 * Accumulator is 0.
+	 */
+	memset(acc, 0, sizeof acc);
+
+	/*
+	 * Process the additional authenticated data, ciphertext, and
+	 * footer in due order.
+	 */
+	br_enc64le(foot, (uint64_t)aad_len);
+	br_enc64le(foot + 8, (uint64_t)len);
+	poly1305_inner(acc, r, aad, aad_len);
+	poly1305_inner(acc, r, data, len);
+	poly1305_inner(acc, r, foot, sizeof foot);
+
+	/*
+	 * Finalise modular reduction. This is done with carry propagation
+	 * and applying the '2^130 = -5 mod p' rule. Note that the output
+	 * of poly1035_inner() is already mostly reduced, since only
+	 * acc[1] may be (very slightly) above 2^13. A single loop back
+	 * to acc[1] will be enough to make the value fit in 130 bits.
+	 */
+	cc = 0;
+	for (i = 1; i < 10; i ++) {
+		z = acc[i] + cc;
+		acc[i] = z & 0x1FFF;
+		cc = z >> 13;
+	}
+	z = acc[0] + cc + (cc << 2);
+	acc[0] = z & 0x1FFF;
+	acc[1] += z >> 13;
+
+	/*
+	 * We may still have a value in the 2^130-5..2^130-1 range, in
+	 * which case we must reduce it again. The code below selects,
+	 * in constant-time, between 'acc' and 'acc-p',
+	 */
+	ctl = GT(acc[0], 0x1FFA);
+	for (i = 1; i < 10; i ++) {
+		ctl &= EQ(acc[i], 0x1FFF);
+	}
+	acc[0] = MUX(ctl, acc[0] - 0x1FFB, acc[0]);
+	for (i = 1; i < 10; i ++) {
+		acc[i] &= ~(-ctl);
+	}
+
+	/*
+	 * Convert back the accumulator to 32-bit words, and add the
+	 * 's' value (second half of pkey[]). That addition is done
+	 * modulo 2^128.
+	 */
+	z = acc[0] + (acc[1] << 13) + br_dec16le(pkey + 16);
+	br_enc16le((unsigned char *)tag, z & 0xFFFF);
+	z = (z >> 16) + (acc[2] << 10) + br_dec16le(pkey + 18);
+	br_enc16le((unsigned char *)tag + 2, z & 0xFFFF);
+	z = (z >> 16) + (acc[3] << 7) + br_dec16le(pkey + 20);
+	br_enc16le((unsigned char *)tag + 4, z & 0xFFFF);
+	z = (z >> 16) + (acc[4] << 4) + br_dec16le(pkey + 22);
+	br_enc16le((unsigned char *)tag + 6, z & 0xFFFF);
+	z = (z >> 16) + (acc[5] << 1) + (acc[6] << 14) + br_dec16le(pkey + 24);
+	br_enc16le((unsigned char *)tag + 8, z & 0xFFFF);
+	z = (z >> 16) + (acc[7] << 11) + br_dec16le(pkey + 26);
+	br_enc16le((unsigned char *)tag + 10, z & 0xFFFF);
+	z = (z >> 16) + (acc[8] << 8) + br_dec16le(pkey + 28);
+	br_enc16le((unsigned char *)tag + 12, z & 0xFFFF);
+	z = (z >> 16) + (acc[9] << 5) + br_dec16le(pkey + 30);
+	br_enc16le((unsigned char *)tag + 14, z & 0xFFFF);
+
+	/*
+	 * If decrypting, then ChaCha20 runs _after_ Poly1305.
+	 */
+	if (!encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+}
diff --git a/third_party/bearssl/src/poly1305_ctmulq.c b/third_party/bearssl/src/poly1305_ctmulq.c
new file mode 100644
index 0000000..b00683a
--- /dev/null
+++ b/third_party/bearssl/src/poly1305_ctmulq.c
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#if BR_INT128
+
+#define MUL128(hi, lo, x, y)   do { \
+		unsigned __int128 mul128tmp; \
+		mul128tmp = (unsigned __int128)(x) * (unsigned __int128)(y); \
+		(hi) = (uint64_t)(mul128tmp >> 64); \
+		(lo) = (uint64_t)mul128tmp; \
+	} while (0)
+
+#elif BR_UMUL128
+
+#include <intrin.h>
+
+#define MUL128(hi, lo, x, y)   do { \
+		(lo) = _umul128((x), (y), &(hi)); \
+	} while (0)
+
+#endif
+
+#define MASK42   ((uint64_t)0x000003FFFFFFFFFF)
+#define MASK44   ((uint64_t)0x00000FFFFFFFFFFF)
+
+/*
+ * The "accumulator" word is nominally a 130-bit value. We split it into
+ * words of 44 bits, each held in a 64-bit variable.
+ *
+ * If the current accumulator is a = a0 + a1*W + a2*W^2 (where W = 2^44)
+ * and r = r0 + r1*W + r2*W^2, then:
+ *
+ *   a*r = (a0*r0)
+ *       + (a0*r1 + a1*r0) * W
+ *       + (a0*r2 + a1*r1 + a2*r0) * W^2
+ *       + (a1*r2 + a2*r1) * W^3
+ *       + (a2*r2) * W^4
+ *
+ * We want to reduce that value modulo p = 2^130-5, so W^3 = 20 mod p,
+ * and W^4 = 20*W mod p. Thus, if we define u1 = 20*r1 and u2 = 20*r2,
+ * then the equations above become:
+ *
+ *  b0 = a0*r0 + a1*u2 + a2*u1
+ *  b1 = a0*r1 + a1*r0 + a2*u2
+ *  b2 = a0*r2 + a1*r1 + a2*r0
+ *
+ * In order to make u1 fit in 44 bits, we can change these equations
+ * into:
+ *
+ *  b0 = a0*r0 + a1*u2 + a2*t1
+ *  b1 = a0*r1 + a1*r0 + a2*t2
+ *  b2 = a0*r2 + a1*r1 + a2*r0
+ *
+ * Where t1 is u1 truncated to 44 bits, and t2 is u2 added to the extra
+ * bits of u1. Note that since r is clamped down to a 124-bit value, the
+ * values u2 and t2 fit on 44 bits too.
+ *
+ * The bx values are larger than 44 bits, so we may split them into a
+ * lower half (cx, 44 bits) and an upper half (dx). The new values for
+ * the accumulator are then:
+ *
+ *  e0 = c0 + 20*d2
+ *  e1 = c1 + d0
+ *  e2 = c2 + d1
+ *
+ * The equations allow for some room, i.e. the ax values may be larger
+ * than 44 bits. Similarly, the ex values will usually be larger than
+ * the ax. Thus, some sort of carry propagation must be done regularly,
+ * though not necessarily at each iteration. In particular, we do not
+ * need to compute the additions (for the bx values) over 128-bit
+ * quantities; we can stick to 64-bit computations.
+ *
+ *
+ * Since the 128-bit result of a 64x64 multiplication is actually
+ * represented over two 64-bit registers, it is cheaper to arrange for
+ * any split that happens between the "high" and "low" halves to be on
+ * that 64-bit boundary. This is done by left shifting the rx, ux and tx
+ * by 20 bits (since they all fit on 44 bits each, this shift is
+ * always possible).
+ */
+
+static void
+poly1305_inner_big(uint64_t *acc, uint64_t *r, const void *data, size_t len)
+{
+
+#define MX(hi, lo, m0, m1, m2)   do { \
+		uint64_t mxhi, mxlo; \
+		MUL128(mxhi, mxlo, a0, m0); \
+		(hi) = mxhi; \
+		(lo) = mxlo >> 20; \
+		MUL128(mxhi, mxlo, a1, m1); \
+		(hi) += mxhi; \
+		(lo) += mxlo >> 20; \
+		MUL128(mxhi, mxlo, a2, m2); \
+		(hi) += mxhi; \
+		(lo) += mxlo >> 20; \
+	} while (0)
+
+	const unsigned char *buf;
+	uint64_t a0, a1, a2;
+	uint64_t r0, r1, r2, t1, t2, u2;
+
+	r0 = r[0];
+	r1 = r[1];
+	r2 = r[2];
+	t1 = r[3];
+	t2 = r[4];
+	u2 = r[5];
+	a0 = acc[0];
+	a1 = acc[1];
+	a2 = acc[2];
+	buf = data;
+
+	while (len > 0) {
+		uint64_t v0, v1, v2;
+		uint64_t c0, c1, c2, d0, d1, d2;
+
+		v0 = br_dec64le(buf + 0);
+		v1 = br_dec64le(buf + 8);
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		v0 = br_dec64le(buf + 16);
+		v1 = br_dec64le(buf + 24);
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		v0 = br_dec64le(buf + 32);
+		v1 = br_dec64le(buf + 40);
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		v0 = br_dec64le(buf + 48);
+		v1 = br_dec64le(buf + 56);
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		a1 += a0 >> 44;
+		a0 &= MASK44;
+		a2 += a1 >> 44;
+		a1 &= MASK44;
+		a0 += 20 * (a2 >> 44);
+		a2 &= MASK44;
+
+		buf += 64;
+		len -= 64;
+	}
+	acc[0] = a0;
+	acc[1] = a1;
+	acc[2] = a2;
+
+#undef MX
+}
+
+static void
+poly1305_inner_small(uint64_t *acc, uint64_t *r, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	uint64_t a0, a1, a2;
+	uint64_t r0, r1, r2, t1, t2, u2;
+
+	r0 = r[0];
+	r1 = r[1];
+	r2 = r[2];
+	t1 = r[3];
+	t2 = r[4];
+	u2 = r[5];
+	a0 = acc[0];
+	a1 = acc[1];
+	a2 = acc[2];
+	buf = data;
+
+	while (len > 0) {
+		uint64_t v0, v1, v2;
+		uint64_t c0, c1, c2, d0, d1, d2;
+		unsigned char tmp[16];
+
+		if (len < 16) {
+			memcpy(tmp, buf, len);
+			memset(tmp + len, 0, (sizeof tmp) - len);
+			buf = tmp;
+			len = 16;
+		}
+		v0 = br_dec64le(buf + 0);
+		v1 = br_dec64le(buf + 8);
+
+		v2 = v1 >> 24;
+		v1 = ((v0 >> 44) | (v1 << 20)) & MASK44;
+		v0 &= MASK44;
+
+		a0 += v0;
+		a1 += v1;
+		a2 += v2 + ((uint64_t)1 << 40);
+
+#define MX(hi, lo, m0, m1, m2)   do { \
+		uint64_t mxhi, mxlo; \
+		MUL128(mxhi, mxlo, a0, m0); \
+		(hi) = mxhi; \
+		(lo) = mxlo >> 20; \
+		MUL128(mxhi, mxlo, a1, m1); \
+		(hi) += mxhi; \
+		(lo) += mxlo >> 20; \
+		MUL128(mxhi, mxlo, a2, m2); \
+		(hi) += mxhi; \
+		(lo) += mxlo >> 20; \
+	} while (0)
+
+		MX(d0, c0, r0, u2, t1);
+		MX(d1, c1, r1, r0, t2);
+		MX(d2, c2, r2, r1, r0);
+
+#undef MX
+
+		a0 = c0 + 20 * d2;
+		a1 = c1 + d0;
+		a2 = c2 + d1;
+
+		a1 += a0 >> 44;
+		a0 &= MASK44;
+		a2 += a1 >> 44;
+		a1 &= MASK44;
+		a0 += 20 * (a2 >> 44);
+		a2 &= MASK44;
+
+		buf += 16;
+		len -= 16;
+	}
+	acc[0] = a0;
+	acc[1] = a1;
+	acc[2] = a2;
+}
+
+static inline void
+poly1305_inner(uint64_t *acc, uint64_t *r, const void *data, size_t len)
+{
+	if (len >= 64) {
+		size_t len2;
+
+		len2 = len & ~(size_t)63;
+		poly1305_inner_big(acc, r, data, len2);
+		data = (const unsigned char *)data + len2;
+		len -= len2;
+	}
+	if (len > 0) {
+		poly1305_inner_small(acc, r, data, len);
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_ctmulq_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt)
+{
+	unsigned char pkey[32], foot[16];
+	uint64_t r[6], acc[3], r0, r1;
+	uint32_t v0, v1, v2, v3, v4;
+	uint64_t w0, w1, w2, w3;
+	uint32_t ctl;
+
+	/*
+	 * Compute the MAC key. The 'r' value is the first 16 bytes of
+	 * pkey[].
+	 */
+	memset(pkey, 0, sizeof pkey);
+	ichacha(key, iv, 0, pkey, sizeof pkey);
+
+	/*
+	 * If encrypting, ChaCha20 must run first, followed by Poly1305.
+	 * When decrypting, the operations are reversed.
+	 */
+	if (encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+
+	/*
+	 * Run Poly1305. We must process the AAD, then ciphertext, then
+	 * the footer (with the lengths). Note that the AAD and ciphertext
+	 * are meant to be padded with zeros up to the next multiple of 16,
+	 * and the length of the footer is 16 bytes as well.
+	 */
+
+	/*
+	 * Apply the "clamping" on r.
+	 */
+	pkey[ 3] &= 0x0F;
+	pkey[ 4] &= 0xFC;
+	pkey[ 7] &= 0x0F;
+	pkey[ 8] &= 0xFC;
+	pkey[11] &= 0x0F;
+	pkey[12] &= 0xFC;
+	pkey[15] &= 0x0F;
+
+	/*
+	 * Decode the 'r' value into 44-bit words, left-shifted by 20 bits.
+	 * Also compute the u1 and u2 values.
+	 */
+	r0 = br_dec64le(pkey +  0);
+	r1 = br_dec64le(pkey +  8);
+	r[0] = r0 << 20;
+	r[1] = ((r0 >> 24) | (r1 << 40)) & ~(uint64_t)0xFFFFF;
+	r[2] = (r1 >> 4) & ~(uint64_t)0xFFFFF;
+	r1 = 20 * (r[1] >> 20);
+	r[3] = r1 << 20;
+	r[5] = 20 * r[2];
+	r[4] = (r[5] + (r1 >> 24)) & ~(uint64_t)0xFFFFF;
+
+	/*
+	 * Accumulator is 0.
+	 */
+	acc[0] = 0;
+	acc[1] = 0;
+	acc[2] = 0;
+
+	/*
+	 * Process the additional authenticated data, ciphertext, and
+	 * footer in due order.
+	 */
+	br_enc64le(foot, (uint64_t)aad_len);
+	br_enc64le(foot + 8, (uint64_t)len);
+	poly1305_inner(acc, r, aad, aad_len);
+	poly1305_inner(acc, r, data, len);
+	poly1305_inner_small(acc, r, foot, sizeof foot);
+
+	/*
+	 * Finalise modular reduction. At that point, the value consists
+	 * in three 44-bit values (the lowest one might be slightly above
+	 * 2^44). Two loops shall be sufficient.
+	 */
+	acc[1] += (acc[0] >> 44);
+	acc[0] &= MASK44;
+	acc[2] += (acc[1] >> 44);
+	acc[1] &= MASK44;
+	acc[0] += 5 * (acc[2] >> 42);
+	acc[2] &= MASK42;
+	acc[1] += (acc[0] >> 44);
+	acc[0] &= MASK44;
+	acc[2] += (acc[1] >> 44);
+	acc[1] &= MASK44;
+	acc[0] += 5 * (acc[2] >> 42);
+	acc[2] &= MASK42;
+
+	/*
+	 * The value may still fall in the 2^130-5..2^130-1 range, in
+	 * which case we must reduce it again. The code below selects,
+	 * in constant-time, between 'acc' and 'acc-p'. We encode the
+	 * value over four 32-bit integers to finish the operation.
+	 */
+	v0 = (uint32_t)acc[0];
+	v1 = (uint32_t)(acc[0] >> 32) | ((uint32_t)acc[1] << 12);
+	v2 = (uint32_t)(acc[1] >> 20) | ((uint32_t)acc[2] << 24);
+	v3 = (uint32_t)(acc[2] >> 8);
+	v4 = (uint32_t)(acc[2] >> 40);
+
+	ctl = GT(v0, 0xFFFFFFFA);
+	ctl &= EQ(v1, 0xFFFFFFFF);
+	ctl &= EQ(v2, 0xFFFFFFFF);
+	ctl &= EQ(v3, 0xFFFFFFFF);
+	ctl &= EQ(v4, 0x00000003);
+	v0 = MUX(ctl, v0 + 5, v0);
+	v1 = MUX(ctl, 0, v1);
+	v2 = MUX(ctl, 0, v2);
+	v3 = MUX(ctl, 0, v3);
+
+	/*
+	 * Add the "s" value. This is done modulo 2^128. Don't forget
+	 * carry propagation...
+	 */
+	w0 = (uint64_t)v0 + (uint64_t)br_dec32le(pkey + 16);
+	w1 = (uint64_t)v1 + (uint64_t)br_dec32le(pkey + 20) + (w0 >> 32);
+	w2 = (uint64_t)v2 + (uint64_t)br_dec32le(pkey + 24) + (w1 >> 32);
+	w3 = (uint64_t)v3 + (uint64_t)br_dec32le(pkey + 28) + (w2 >> 32);
+	v0 = (uint32_t)w0;
+	v1 = (uint32_t)w1;
+	v2 = (uint32_t)w2;
+	v3 = (uint32_t)w3;
+
+	/*
+	 * Encode the tag.
+	 */
+	br_enc32le((unsigned char *)tag +  0, v0);
+	br_enc32le((unsigned char *)tag +  4, v1);
+	br_enc32le((unsigned char *)tag +  8, v2);
+	br_enc32le((unsigned char *)tag + 12, v3);
+
+	/*
+	 * If decrypting, then ChaCha20 runs _after_ Poly1305.
+	 */
+	if (!encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+}
+
+/* see bearssl_block.h */
+br_poly1305_run
+br_poly1305_ctmulq_get(void)
+{
+	return &br_poly1305_ctmulq_run;
+}
+
+#else
+
+/* see bearssl_block.h */
+br_poly1305_run
+br_poly1305_ctmulq_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/poly1305_i15.c b/third_party/bearssl/src/poly1305_i15.c
new file mode 100644
index 0000000..6f89212
--- /dev/null
+++ b/third_party/bearssl/src/poly1305_i15.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This is a "reference" implementation of Poly1305 that uses the
+ * generic "i15" code for big integers. It is slow, but it handles all
+ * big-integer operations with generic code, thereby avoiding most
+ * tricky situations with carry propagation and modular reduction.
+ */
+
+/*
+ * Modulus: 2^130-5.
+ */
+static const uint16_t P1305[] = {
+	0x008A,
+	0x7FFB, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x03FF
+};
+
+/*
+ * -p mod 2^15.
+ */
+#define P0I   0x4CCD
+
+/*
+ * R^2 mod p, for conversion to Montgomery representation (R = 2^135,
+ * since we use 9 words of 15 bits each, and 15*9 = 135).
+ */
+static const uint16_t R2[] = {
+	0x008A,
+	0x6400, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+};
+
+/*
+ * Perform the inner processing of blocks for Poly1305. The "r" array
+ * is in Montgomery representation, while the "a" array is not.
+ */
+static void
+poly1305_inner(uint16_t *a, const uint16_t *r, const void *data, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = data;
+	while (len > 0) {
+		unsigned char tmp[16], rev[16];
+		uint16_t b[10];
+		uint32_t ctl;
+		int i;
+
+		/*
+		 * If there is a partial block, right-pad it with zeros.
+		 */
+		if (len < 16) {
+			memset(tmp, 0, sizeof tmp);
+			memcpy(tmp, buf, len);
+			buf = tmp;
+			len = 16;
+		}
+
+		/*
+		 * Decode next block and apply the "high bit". Since
+		 * decoding is little-endian, we must byte-swap the buffer.
+		 */
+		for (i = 0; i < 16; i ++) {
+			rev[i] = buf[15 - i];
+		}
+		br_i15_decode_mod(b, rev, sizeof rev, P1305);
+		b[9] |= 0x0100;
+
+		/*
+		 * Add the accumulator to the decoded block (modular
+		 * addition).
+		 */
+		ctl = br_i15_add(b, a, 1);
+		ctl |= NOT(br_i15_sub(b, P1305, 0));
+		br_i15_sub(b, P1305, ctl);
+
+		/*
+		 * Multiply by r, result is the new accumulator value.
+		 */
+		br_i15_montymul(a, b, r, P1305, P0I);
+
+		buf += 16;
+		len -= 16;
+	}
+}
+
+/*
+ * Byteswap a 16-byte value.
+ */
+static void
+byteswap16(unsigned char *buf)
+{
+	int i;
+
+	for (i = 0; i < 8; i ++) {
+		unsigned x;
+
+		x = buf[i];
+		buf[i] = buf[15 - i];
+		buf[15 - i] = x;
+	}
+}
+
+/* see bearssl_block.h */
+void
+br_poly1305_i15_run(const void *key, const void *iv,
+	void *data, size_t len, const void *aad, size_t aad_len,
+	void *tag, br_chacha20_run ichacha, int encrypt)
+{
+	unsigned char pkey[32], foot[16];
+	uint16_t t[10], r[10], acc[10];
+
+	/*
+	 * Compute the MAC key. The 'r' value is the first 16 bytes of
+	 * pkey[].
+	 */
+	memset(pkey, 0, sizeof pkey);
+	ichacha(key, iv, 0, pkey, sizeof pkey);
+
+	/*
+	 * If encrypting, ChaCha20 must run first, followed by Poly1305.
+	 * When decrypting, the operations are reversed.
+	 */
+	if (encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+
+	/*
+	 * Run Poly1305. We must process the AAD, then ciphertext, then
+	 * the footer (with the lengths). Note that the AAD and ciphertext
+	 * are meant to be padded with zeros up to the next multiple of 16,
+	 * and the length of the footer is 16 bytes as well.
+	 */
+
+	/*
+	 * Apply the "clamping" operation on the encoded 'r' value.
+	 */
+	pkey[ 3] &= 0x0F;
+	pkey[ 7] &= 0x0F;
+	pkey[11] &= 0x0F;
+	pkey[15] &= 0x0F;
+	pkey[ 4] &= 0xFC;
+	pkey[ 8] &= 0xFC;
+	pkey[12] &= 0xFC;
+
+	/*
+	 * Decode the clamped 'r' value. Decoding should use little-endian
+	 * so we must byteswap the value first.
+	 */
+	byteswap16(pkey);
+	br_i15_decode_mod(t, pkey, 16, P1305);
+
+	/*
+	 * Convert 'r' to Montgomery representation.
+	 */
+	br_i15_montymul(r, t, R2, P1305, P0I);
+
+	/*
+	 * Accumulator is 0.
+	 */
+	br_i15_zero(acc, 0x8A);
+
+	/*
+	 * Process the additional authenticated data, ciphertext, and
+	 * footer in due order.
+	 */
+	br_enc64le(foot, (uint64_t)aad_len);
+	br_enc64le(foot + 8, (uint64_t)len);
+	poly1305_inner(acc, r, aad, aad_len);
+	poly1305_inner(acc, r, data, len);
+	poly1305_inner(acc, r, foot, sizeof foot);
+
+	/*
+	 * Decode the value 's'. Again, a byteswap is needed.
+	 */
+	byteswap16(pkey + 16);
+	br_i15_decode_mod(t, pkey + 16, 16, P1305);
+
+	/*
+	 * Add the value 's' to the accumulator. That addition is done
+	 * modulo 2^128, so we just ignore the carry.
+	 */
+	br_i15_add(acc, t, 1);
+
+	/*
+	 * Encode the result (128 low bits) to the tag. Encoding should
+	 * be little-endian.
+	 */
+	br_i15_encode(tag, 16, acc);
+	byteswap16(tag);
+
+	/*
+	 * If decrypting, then ChaCha20 runs _after_ Poly1305.
+	 */
+	if (!encrypt) {
+		ichacha(key, iv, 1, data, len);
+	}
+}
diff --git a/third_party/bearssl/src/prf.c b/third_party/bearssl/src/prf.c
new file mode 100644
index 0000000..f04a5fb
--- /dev/null
+++ b/third_party/bearssl/src/prf.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_tls_phash(void *dst, size_t len,
+	const br_hash_class *dig,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed)
+{
+	unsigned char *buf;
+	unsigned char tmp[64], a[64];
+	br_hmac_key_context kc;
+	br_hmac_context hc;
+	size_t label_len, hlen, u;
+
+	if (len == 0) {
+		return;
+	}
+	buf = dst;
+	for (label_len = 0; label[label_len]; label_len ++);
+	hlen = br_digest_size(dig);
+	br_hmac_key_init(&kc, dig, secret, secret_len);
+	br_hmac_init(&hc, &kc, 0);
+	br_hmac_update(&hc, label, label_len);
+	for (u = 0; u < seed_num; u ++) {
+		br_hmac_update(&hc, seed[u].data, seed[u].len);
+	}
+	br_hmac_out(&hc, a);
+	for (;;) {
+		br_hmac_init(&hc, &kc, 0);
+		br_hmac_update(&hc, a, hlen);
+		br_hmac_update(&hc, label, label_len);
+		for (u = 0; u < seed_num; u ++) {
+			br_hmac_update(&hc, seed[u].data, seed[u].len);
+		}
+		br_hmac_out(&hc, tmp);
+		for (u = 0; u < hlen && u < len; u ++) {
+			buf[u] ^= tmp[u];
+		}
+		buf += u;
+		len -= u;
+		if (len == 0) {
+			return;
+		}
+		br_hmac_init(&hc, &kc, 0);
+		br_hmac_update(&hc, a, hlen);
+		br_hmac_out(&hc, a);
+	}
+}
diff --git a/third_party/bearssl/src/prf_md5sha1.c b/third_party/bearssl/src/prf_md5sha1.c
new file mode 100644
index 0000000..3212833
--- /dev/null
+++ b/third_party/bearssl/src/prf_md5sha1.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_tls10_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed)
+{
+	const unsigned char *s1;
+	size_t slen;
+
+	s1 = secret;
+	slen = (secret_len + 1) >> 1;
+	memset(dst, 0, len);
+	br_tls_phash(dst, len, &br_md5_vtable,
+		s1, slen, label, seed_num, seed);
+	br_tls_phash(dst, len, &br_sha1_vtable,
+		s1 + secret_len - slen, slen, label, seed_num, seed);
+}
diff --git a/third_party/bearssl/src/prf_sha256.c b/third_party/bearssl/src/prf_sha256.c
new file mode 100644
index 0000000..76041de
--- /dev/null
+++ b/third_party/bearssl/src/prf_sha256.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_tls12_sha256_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed)
+{
+	memset(dst, 0, len);
+	br_tls_phash(dst, len, &br_sha256_vtable,
+		secret, secret_len, label, seed_num, seed);
+}
diff --git a/third_party/bearssl/src/prf_sha384.c b/third_party/bearssl/src/prf_sha384.c
new file mode 100644
index 0000000..c20c4e6
--- /dev/null
+++ b/third_party/bearssl/src/prf_sha384.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl.h */
+void
+br_tls12_sha384_prf(void *dst, size_t len,
+	const void *secret, size_t secret_len, const char *label,
+	size_t seed_num, const br_tls_prf_seed_chunk *seed)
+{
+	memset(dst, 0, len);
+	br_tls_phash(dst, len, &br_sha384_vtable,
+		secret, secret_len, label, seed_num, seed);
+}
diff --git a/third_party/bearssl/src/rsa_default_keygen.c b/third_party/bearssl/src/rsa_default_keygen.c
new file mode 100644
index 0000000..f2e83c8
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_keygen.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_keygen
+br_rsa_keygen_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_keygen;
+#elif BR_LOMUL
+	return &br_rsa_i15_keygen;
+#else
+	return &br_rsa_i31_keygen;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_modulus.c b/third_party/bearssl/src/rsa_default_modulus.c
new file mode 100644
index 0000000..57d4be5
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_modulus.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_compute_modulus
+br_rsa_compute_modulus_get_default(void)
+{
+#if BR_LOMUL
+	return &br_rsa_i15_compute_modulus;
+#else
+	return &br_rsa_i31_compute_modulus;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_oaep_decrypt.c b/third_party/bearssl/src/rsa_default_oaep_decrypt.c
new file mode 100644
index 0000000..7345d64
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_oaep_decrypt.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_decrypt
+br_rsa_oaep_decrypt_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_oaep_decrypt;
+#elif BR_LOMUL
+	return &br_rsa_i15_oaep_decrypt;
+#else
+	return &br_rsa_i31_oaep_decrypt;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_oaep_encrypt.c b/third_party/bearssl/src/rsa_default_oaep_encrypt.c
new file mode 100644
index 0000000..ae33fcc
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_oaep_encrypt.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_encrypt
+br_rsa_oaep_encrypt_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_oaep_encrypt;
+#elif BR_LOMUL
+	return &br_rsa_i15_oaep_encrypt;
+#else
+	return &br_rsa_i31_oaep_encrypt;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pkcs1_sign.c b/third_party/bearssl/src/rsa_default_pkcs1_sign.c
new file mode 100644
index 0000000..e926704
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pkcs1_sign.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_sign
+br_rsa_pkcs1_sign_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_pkcs1_sign;
+#elif BR_LOMUL
+	return &br_rsa_i15_pkcs1_sign;
+#else
+	return &br_rsa_i31_pkcs1_sign;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_default_pkcs1_vrfy.c
new file mode 100644
index 0000000..b3dbeb7
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pkcs1_vrfy.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_vrfy
+br_rsa_pkcs1_vrfy_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_pkcs1_vrfy;
+#elif BR_LOMUL
+	return &br_rsa_i15_pkcs1_vrfy;
+#else
+	return &br_rsa_i31_pkcs1_vrfy;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_priv.c b/third_party/bearssl/src/rsa_default_priv.c
new file mode 100644
index 0000000..bb0b2c0
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_priv.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_private
+br_rsa_private_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_private;
+#elif BR_LOMUL
+	return &br_rsa_i15_private;
+#else
+	return &br_rsa_i31_private;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_privexp.c b/third_party/bearssl/src/rsa_default_privexp.c
new file mode 100644
index 0000000..cda4555
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_privexp.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_compute_privexp
+br_rsa_compute_privexp_get_default(void)
+{
+#if BR_LOMUL
+	return &br_rsa_i15_compute_privexp;
+#else
+	return &br_rsa_i31_compute_privexp;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pss_sign.c b/third_party/bearssl/src/rsa_default_pss_sign.c
new file mode 100644
index 0000000..ce4f3e0
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pss_sign.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_pss_sign
+br_rsa_pss_sign_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_pss_sign;
+#elif BR_LOMUL
+	return &br_rsa_i15_pss_sign;
+#else
+	return &br_rsa_i31_pss_sign;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pss_vrfy.c b/third_party/bearssl/src/rsa_default_pss_vrfy.c
new file mode 100644
index 0000000..e3a9ad9
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pss_vrfy.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_pss_vrfy
+br_rsa_pss_vrfy_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_pss_vrfy;
+#elif BR_LOMUL
+	return &br_rsa_i15_pss_vrfy;
+#else
+	return &br_rsa_i31_pss_vrfy;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pub.c b/third_party/bearssl/src/rsa_default_pub.c
new file mode 100644
index 0000000..a1f03ef
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pub.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_public
+br_rsa_public_get_default(void)
+{
+#if BR_INT128 || BR_UMUL128
+	return &br_rsa_i62_public;
+#elif BR_LOMUL
+	return &br_rsa_i15_public;
+#else
+	return &br_rsa_i31_public;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_default_pubexp.c b/third_party/bearssl/src/rsa_default_pubexp.c
new file mode 100644
index 0000000..47bc000
--- /dev/null
+++ b/third_party/bearssl/src/rsa_default_pubexp.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+br_rsa_compute_pubexp
+br_rsa_compute_pubexp_get_default(void)
+{
+#if BR_LOMUL
+	return &br_rsa_i15_compute_pubexp;
+#else
+	return &br_rsa_i31_compute_pubexp;
+#endif
+}
diff --git a/third_party/bearssl/src/rsa_i15_keygen.c b/third_party/bearssl/src/rsa_i15_keygen.c
new file mode 100644
index 0000000..e8da419
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_keygen.c
@@ -0,0 +1,583 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Make a random integer of the provided size. The size is encoded.
+ * The header word is untouched.
+ */
+static void
+mkrand(const br_prng_class **rng, uint16_t *x, uint32_t esize)
+{
+	size_t u, len;
+	unsigned m;
+
+	len = (esize + 15) >> 4;
+	(*rng)->generate(rng, x + 1, len * sizeof(uint16_t));
+	for (u = 1; u < len; u ++) {
+		x[u] &= 0x7FFF;
+	}
+	m = esize & 15;
+	if (m == 0) {
+		x[len] &= 0x7FFF;
+	} else {
+		x[len] &= 0x7FFF >> (15 - m);
+	}
+}
+
+/*
+ * This is the big-endian unsigned representation of the product of
+ * all small primes from 13 to 1481.
+ */
+static const unsigned char SMALL_PRIMES[] = {
+	0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A,
+	0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7,
+	0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37,
+	0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5,
+	0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E,
+	0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6,
+	0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C,
+	0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40,
+	0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50,
+	0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7,
+	0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3,
+	0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E,
+	0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC,
+	0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08,
+	0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B,
+	0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22,
+	0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77,
+	0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E,
+	0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80,
+	0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8,
+	0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2,
+	0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC,
+	0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54,
+	0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74,
+	0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C,
+	0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD
+};
+
+/*
+ * We need temporary values for at least 7 integers of the same size
+ * as a factor (including header word); more space helps with performance
+ * (in modular exponentiations), but we much prefer to remain under
+ * 2 kilobytes in total, to save stack space. The macro TEMPS below
+ * exceeds 1024 (which is a count in 16-bit words) when BR_MAX_RSA_SIZE
+ * is greater than 4350 (default value is 4096, so the 2-kB limit is
+ * maintained unless BR_MAX_RSA_SIZE was modified).
+ */
+#define MAX(x, y)   ((x) > (y) ? (x) : (y))
+#define TEMPS       MAX(1024, 7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 29) / 15))
+
+/*
+ * Perform trial division on a candidate prime. This computes
+ * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The
+ * br_i15_moddiv() function will report an error if y is not invertible
+ * modulo x. Returned value is 1 on success (none of the small primes
+ * divides x), 0 on error (a non-trivial GCD is obtained).
+ *
+ * This function assumes that x is odd.
+ */
+static uint32_t
+trial_divisions(const uint16_t *x, uint16_t *t)
+{
+	uint16_t *y;
+	uint16_t x0i;
+
+	y = t;
+	t += 1 + ((x[0] + 15) >> 4);
+	x0i = br_i15_ninv15(x[1]);
+	br_i15_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x);
+	return br_i15_moddiv(y, y, x, x0i, t);
+}
+
+/*
+ * Perform n rounds of Miller-Rabin on the candidate prime x. This
+ * function assumes that x = 3 mod 4.
+ *
+ * Returned value is 1 on success (all rounds completed successfully),
+ * 0 otherwise.
+ */
+static uint32_t
+miller_rabin(const br_prng_class **rng, const uint16_t *x, int n,
+	uint16_t *t, size_t tlen)
+{
+	/*
+	 * Since x = 3 mod 4, the Miller-Rabin test is simple:
+	 *  - get a random base a (such that 1 < a < x-1)
+	 *  - compute z = a^((x-1)/2) mod x
+	 *  - if z != 1 and z != x-1, the number x is composite
+	 *
+	 * We generate bases 'a' randomly with a size which is
+	 * one bit less than x, which ensures that a < x-1. It
+	 * is not useful to verify that a > 1 because the probability
+	 * that we get a value a equal to 0 or 1 is much smaller
+	 * than the probability of our Miller-Rabin tests not to
+	 * detect a composite, which is already quite smaller than the
+	 * probability of the hardware misbehaving and return a
+	 * composite integer because of some glitch (e.g. bad RAM
+	 * or ill-timed cosmic ray).
+	 */
+	unsigned char *xm1d2;
+	size_t xlen, xm1d2_len, xm1d2_len_u16, u;
+	uint32_t asize;
+	unsigned cc;
+	uint16_t x0i;
+
+	/*
+	 * Compute (x-1)/2 (encoded).
+	 */
+	xm1d2 = (unsigned char *)t;
+	xm1d2_len = ((x[0] - (x[0] >> 4)) + 7) >> 3;
+	br_i15_encode(xm1d2, xm1d2_len, x);
+	cc = 0;
+	for (u = 0; u < xm1d2_len; u ++) {
+		unsigned w;
+
+		w = xm1d2[u];
+		xm1d2[u] = (unsigned char)((w >> 1) | cc);
+		cc = w << 7;
+	}
+
+	/*
+	 * We used some words of the provided buffer for (x-1)/2.
+	 */
+	xm1d2_len_u16 = (xm1d2_len + 1) >> 1;
+	t += xm1d2_len_u16;
+	tlen -= xm1d2_len_u16;
+
+	xlen = (x[0] + 15) >> 4;
+	asize = x[0] - 1 - EQ0(x[0] & 15);
+	x0i = br_i15_ninv15(x[1]);
+	while (n -- > 0) {
+		uint16_t *a;
+		uint32_t eq1, eqm1;
+
+		/*
+		 * Generate a random base. We don't need the base to be
+		 * really uniform modulo x, so we just get a random
+		 * number which is one bit shorter than x.
+		 */
+		a = t;
+		a[0] = x[0];
+		a[xlen] = 0;
+		mkrand(rng, a, asize);
+
+		/*
+		 * Compute a^((x-1)/2) mod x. We assume here that the
+		 * function will not fail (the temporary array is large
+		 * enough).
+		 */
+		br_i15_modpow_opt(a, xm1d2, xm1d2_len,
+			x, x0i, t + 1 + xlen, tlen - 1 - xlen);
+
+		/*
+		 * We must obtain either 1 or x-1. Note that x is odd,
+		 * hence x-1 differs from x only in its low word (no
+		 * carry).
+		 */
+		eq1 = a[1] ^ 1;
+		eqm1 = a[1] ^ (x[1] - 1);
+		for (u = 2; u <= xlen; u ++) {
+			eq1 |= a[u];
+			eqm1 |= a[u] ^ x[u];
+		}
+
+		if ((EQ0(eq1) | EQ0(eqm1)) == 0) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+/*
+ * Create a random prime of the provided size. 'size' is the _encoded_
+ * bit length. The two top bits and the two bottom bits are set to 1.
+ */
+static void
+mkprime(const br_prng_class **rng, uint16_t *x, uint32_t esize,
+	uint32_t pubexp, uint16_t *t, size_t tlen)
+{
+	size_t len;
+
+	x[0] = esize;
+	len = (esize + 15) >> 4;
+	for (;;) {
+		size_t u;
+		uint32_t m3, m5, m7, m11;
+		int rounds;
+
+		/*
+		 * Generate random bits. We force the two top bits and the
+		 * two bottom bits to 1.
+		 */
+		mkrand(rng, x, esize);
+		if ((esize & 15) == 0) {
+			x[len] |= 0x6000;
+		} else if ((esize & 15) == 1) {
+			x[len] |= 0x0001;
+			x[len - 1] |= 0x4000;
+		} else {
+			x[len] |= 0x0003 << ((esize & 15) - 2);
+		}
+		x[1] |= 0x0003;
+
+		/*
+		 * Trial division with low primes (3, 5, 7 and 11). We
+		 * use the following properties:
+		 *
+		 *   2^2 = 1 mod 3
+		 *   2^4 = 1 mod 5
+		 *   2^3 = 1 mod 7
+		 *   2^10 = 1 mod 11
+		 */
+		m3 = 0;
+		m5 = 0;
+		m7 = 0;
+		m11 = 0;
+		for (u = 0; u < len; u ++) {
+			uint32_t w;
+
+			w = x[1 + u];
+			m3 += w << (u & 1);
+			m3 = (m3 & 0xFF) + (m3 >> 8);
+			m5 += w << ((4 - u) & 3);
+			m5 = (m5 & 0xFF) + (m5 >> 8);
+			m7 += w;
+			m7 = (m7 & 0x1FF) + (m7 >> 9);
+			m11 += w << (5 & -(u & 1));
+			m11 = (m11 & 0x3FF) + (m11 >> 10);
+		}
+
+		/*
+		 * Maximum values of m* at this point:
+		 *  m3:   511
+		 *  m5:   2310
+		 *  m7:   510
+		 *  m11:  2047
+		 * We use the same properties to make further reductions.
+		 */
+
+		m3 = (m3 & 0x0F) + (m3 >> 4);      /* max: 46 */
+		m3 = (m3 & 0x0F) + (m3 >> 4);      /* max: 16 */
+		m3 = ((m3 * 43) >> 5) & 3;
+
+		m5 = (m5 & 0xFF) + (m5 >> 8);      /* max: 263 */
+		m5 = (m5 & 0x0F) + (m5 >> 4);      /* max: 30 */
+		m5 = (m5 & 0x0F) + (m5 >> 4);      /* max: 15 */
+		m5 -= 10 & -GT(m5, 9);
+		m5 -= 5 & -GT(m5, 4);
+
+		m7 = (m7 & 0x3F) + (m7 >> 6);      /* max: 69 */
+		m7 = (m7 & 7) + (m7 >> 3);         /* max: 14 */
+		m7 = ((m7 * 147) >> 7) & 7;
+
+		/*
+		 * 2^5 = 32 = -1 mod 11.
+		 */
+		m11 = (m11 & 0x1F) + 66 - (m11 >> 5);   /* max: 97 */
+		m11 -= 88 & -GT(m11, 87);
+		m11 -= 44 & -GT(m11, 43);
+		m11 -= 22 & -GT(m11, 21);
+		m11 -= 11 & -GT(m11, 10);
+
+		/*
+		 * If any of these modulo is 0, then the candidate is
+		 * not prime. Also, if pubexp is 3, 5, 7 or 11, and the
+		 * corresponding modulus is 1, then the candidate must
+		 * be rejected, because we need e to be invertible
+		 * modulo p-1. We can use simple comparisons here
+		 * because they won't leak information on a candidate
+		 * that we keep, only on one that we reject (and is thus
+		 * not secret).
+		 */
+		if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) {
+			continue;
+		}
+		if ((pubexp == 3 && m3 == 1)
+			|| (pubexp == 5 && m5 == 1)
+			|| (pubexp == 7 && m7 == 1)
+			|| (pubexp == 11 && m11 == 1))
+		{
+			continue;
+		}
+
+		/*
+		 * More trial divisions.
+		 */
+		if (!trial_divisions(x, t)) {
+			continue;
+		}
+
+		/*
+		 * Miller-Rabin algorithm. Since we selected a random
+		 * integer, not a maliciously crafted integer, we can use
+		 * relatively few rounds to lower the risk of a false
+		 * positive (i.e. declaring prime a non-prime) under
+		 * 2^(-80). It is not useful to lower the probability much
+		 * below that, since that would be substantially below
+		 * the probability of the hardware misbehaving. Sufficient
+		 * numbers of rounds are extracted from the Handbook of
+		 * Applied Cryptography, note 4.49 (page 149).
+		 *
+		 * Since we work on the encoded size (esize), we need to
+		 * compare with encoded thresholds.
+		 */
+		if (esize < 320) {
+			rounds = 12;
+		} else if (esize < 480) {
+			rounds = 9;
+		} else if (esize < 693) {
+			rounds = 6;
+		} else if (esize < 906) {
+			rounds = 4;
+		} else if (esize < 1386) {
+			rounds = 3;
+		} else {
+			rounds = 2;
+		}
+
+		if (miller_rabin(rng, x, rounds, t, tlen)) {
+			return;
+		}
+	}
+}
+
+/*
+ * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided
+ * as parameter (with announced bit length equal to that of p). This
+ * function computes d = 1/e mod p-1 (for an odd integer e). Returned
+ * value is 1 on success, 0 on error (an error is reported if e is not
+ * invertible modulo p-1).
+ *
+ * The temporary buffer (t) must have room for at least 4 integers of
+ * the size of p.
+ */
+static uint32_t
+invert_pubexp(uint16_t *d, const uint16_t *m, uint32_t e, uint16_t *t)
+{
+	uint16_t *f;
+	uint32_t r;
+
+	f = t;
+	t += 1 + ((m[0] + 15) >> 4);
+
+	/*
+	 * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd.
+	 */
+	br_i15_zero(d, m[0]);
+	d[1] = 1;
+	br_i15_zero(f, m[0]);
+	f[1] = e & 0x7FFF;
+	f[2] = (e >> 15) & 0x7FFF;
+	f[3] = e >> 30;
+	r = br_i15_moddiv(d, f, m, br_i15_ninv15(m[1]), t);
+
+	/*
+	 * We really want d = 1/e mod p-1, with p = 2m. By the CRT,
+	 * the result is either the d we got, or d + m.
+	 *
+	 * Let's write e*d = 1 + k*m, for some integer k. Integers e
+	 * and m are odd. If d is odd, then e*d is odd, which implies
+	 * that k must be even; in that case, e*d = 1 + (k/2)*2m, and
+	 * thus d is already fine. Conversely, if d is even, then k
+	 * is odd, and we must add m to d in order to get the correct
+	 * result.
+	 */
+	br_i15_add(d, m, (uint32_t)(1 - (d[1] & 1)));
+
+	return r;
+}
+
+/*
+ * Swap two buffers in RAM. They must be disjoint.
+ */
+static void
+bufswap(void *b1, void *b2, size_t len)
+{
+	size_t u;
+	unsigned char *buf1, *buf2;
+
+	buf1 = b1;
+	buf2 = b2;
+	for (u = 0; u < len; u ++) {
+		unsigned w;
+
+		w = buf1[u];
+		buf1[u] = buf2[u];
+		buf2[u] = w;
+	}
+}
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_keygen(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp)
+{
+	uint32_t esize_p, esize_q;
+	size_t plen, qlen, tlen;
+	uint16_t *p, *q, *t;
+	uint16_t tmp[TEMPS];
+	uint32_t r;
+
+	if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) {
+		return 0;
+	}
+	if (pubexp == 0) {
+		pubexp = 3;
+	} else if (pubexp == 1 || (pubexp & 1) == 0) {
+		return 0;
+	}
+
+	esize_p = (size + 1) >> 1;
+	esize_q = size - esize_p;
+	sk->n_bitlen = size;
+	sk->p = kbuf_priv;
+	sk->plen = (esize_p + 7) >> 3;
+	sk->q = sk->p + sk->plen;
+	sk->qlen = (esize_q + 7) >> 3;
+	sk->dp = sk->q + sk->qlen;
+	sk->dplen = sk->plen;
+	sk->dq = sk->dp + sk->dplen;
+	sk->dqlen = sk->qlen;
+	sk->iq = sk->dq + sk->dqlen;
+	sk->iqlen = sk->plen;
+
+	if (pk != NULL) {
+		pk->n = kbuf_pub;
+		pk->nlen = (size + 7) >> 3;
+		pk->e = pk->n + pk->nlen;
+		pk->elen = 4;
+		br_enc32be(pk->e, pubexp);
+		while (*pk->e == 0) {
+			pk->e ++;
+			pk->elen --;
+		}
+	}
+
+	/*
+	 * We now switch to encoded sizes.
+	 *
+	 * floor((x * 17477) / (2^18)) is equal to floor(x/15) for all
+	 * integers x from 0 to 23833.
+	 */
+	esize_p += MUL15(esize_p, 17477) >> 18;
+	esize_q += MUL15(esize_q, 17477) >> 18;
+	plen = (esize_p + 15) >> 4;
+	qlen = (esize_q + 15) >> 4;
+	p = tmp;
+	q = p + 1 + plen;
+	t = q + 1 + qlen;
+	tlen = ((sizeof tmp) / sizeof(uint16_t)) - (2 + plen + qlen);
+
+	/*
+	 * When looking for primes p and q, we temporarily divide
+	 * candidates by 2, in order to compute the inverse of the
+	 * public exponent.
+	 */
+
+	for (;;) {
+		mkprime(rng, p, esize_p, pubexp, t, tlen);
+		br_i15_rshift(p, 1);
+		if (invert_pubexp(t, p, pubexp, t + 1 + plen)) {
+			br_i15_add(p, p, 1);
+			p[1] |= 1;
+			br_i15_encode(sk->p, sk->plen, p);
+			br_i15_encode(sk->dp, sk->dplen, t);
+			break;
+		}
+	}
+
+	for (;;) {
+		mkprime(rng, q, esize_q, pubexp, t, tlen);
+		br_i15_rshift(q, 1);
+		if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) {
+			br_i15_add(q, q, 1);
+			q[1] |= 1;
+			br_i15_encode(sk->q, sk->qlen, q);
+			br_i15_encode(sk->dq, sk->dqlen, t);
+			break;
+		}
+	}
+
+	/*
+	 * If p and q have the same size, then it is possible that q > p
+	 * (when the target modulus size is odd, we generate p with a
+	 * greater bit length than q). If q > p, we want to swap p and q
+	 * (and also dp and dq) for two reasons:
+	 *  - The final step below (inversion of q modulo p) is easier if
+	 *    p > q.
+	 *  - While BearSSL's RSA code is perfectly happy with RSA keys such
+	 *    that p < q, some other implementations have restrictions and
+	 *    require p > q.
+	 *
+	 * Note that we can do a simple non-constant-time swap here,
+	 * because the only information we leak here is that we insist on
+	 * returning p and q such that p > q, which is not a secret.
+	 */
+	if (esize_p == esize_q && br_i15_sub(p, q, 0) == 1) {
+		bufswap(p, q, (1 + plen) * sizeof *p);
+		bufswap(sk->p, sk->q, sk->plen);
+		bufswap(sk->dp, sk->dq, sk->dplen);
+	}
+
+	/*
+	 * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p.
+	 *
+	 * We ensured that p >= q, so this is just a matter of updating the
+	 * header word for q (and possibly adding an extra word).
+	 *
+	 * Theoretically, the call below may fail, in case we were
+	 * extraordinarily unlucky, and p = q. Another failure case is if
+	 * Miller-Rabin failed us _twice_, and p and q are non-prime and
+	 * have a factor is common. We report the error mostly because it
+	 * is cheap and we can, but in practice this never happens (or, at
+	 * least, it happens way less often than hardware glitches).
+	 */
+	q[0] = p[0];
+	if (plen > qlen) {
+		q[plen] = 0;
+		t ++;
+		tlen --;
+	}
+	br_i15_zero(t, p[0]);
+	t[1] = 1;
+	r = br_i15_moddiv(t, q, p, br_i15_ninv15(p[1]), t + 1 + plen);
+	br_i15_encode(sk->iq, sk->iqlen, t);
+
+	/*
+	 * Compute the public modulus too, if required.
+	 */
+	if (pk != NULL) {
+		br_i15_zero(t, p[0]);
+		br_i15_mulacc(t, p, q);
+		br_i15_encode(pk->n, pk->nlen, t);
+	}
+
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i15_modulus.c b/third_party/bearssl/src/rsa_i15_modulus.c
new file mode 100644
index 0000000..16458c3
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_modulus.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i15_compute_modulus(void *n, const br_rsa_private_key *sk)
+{
+	uint16_t tmp[4 * (((BR_MAX_RSA_SIZE / 2) + 14) / 15) + 5];
+	uint16_t *t, *p, *q;
+	const unsigned char *pbuf, *qbuf;
+	size_t nlen, plen, qlen, tlen;
+
+	/*
+	 * Compute actual byte and lengths for p and q.
+	 */
+	pbuf = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	qbuf = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *qbuf == 0) {
+		qbuf ++;
+		qlen --;
+	}
+
+	t = tmp;
+	tlen = (sizeof tmp) / (sizeof tmp[0]);
+
+	/*
+	 * Decode p.
+	 */
+	if ((15 * tlen) < (plen << 3) + 15) {
+		return 0;
+	}
+	br_i15_decode(t, pbuf, plen);
+	p = t;
+	plen = (p[0] + 31) >> 4;
+	t += plen;
+	tlen -= plen;
+
+	/*
+	 * Decode q.
+	 */
+	if ((15 * tlen) < (qlen << 3) + 15) {
+		return 0;
+	}
+	br_i15_decode(t, qbuf, qlen);
+	q = t;
+	qlen = (q[0] + 31) >> 4;
+	t += qlen;
+	tlen -= qlen;
+
+	/*
+	 * Computation can proceed only if we have enough room for the
+	 * modulus.
+	 */
+	if (tlen < (plen + qlen + 1)) {
+		return 0;
+	}
+
+	/*
+	 * Private key already contains the modulus bit length, from which
+	 * we can infer the output length. Even if n is NULL, we still had
+	 * to decode p and q to make sure that the product can be computed.
+	 */
+	nlen = (sk->n_bitlen + 7) >> 3;
+	if (n != NULL) {
+		br_i15_zero(t, p[0]);
+		br_i15_mulacc(t, p, q);
+		br_i15_encode(n, nlen, t);
+	}
+	return nlen;
+}
diff --git a/third_party/bearssl/src/rsa_i15_oaep_decrypt.c b/third_party/bearssl/src/rsa_i15_oaep_decrypt.c
new file mode 100644
index 0000000..927eecd
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_oaep_decrypt.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_oaep_decrypt(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len)
+{
+	uint32_t r;
+
+	if (*len != ((sk->n_bitlen + 7) >> 3)) {
+		return 0;
+	}
+	r = br_rsa_i15_private(data, sk);
+	r &= br_rsa_oaep_unpad(dig, label, label_len, data, len);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i15_oaep_encrypt.c b/third_party/bearssl/src/rsa_i15_oaep_encrypt.c
new file mode 100644
index 0000000..b9a6cfa
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_oaep_encrypt.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i15_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t dlen;
+
+	dlen = br_rsa_oaep_pad(rnd, dig, label, label_len,
+		pk, dst, dst_max_len, src, src_len);
+	if (dlen == 0) {
+		return 0;
+	}
+	return dlen & -(size_t)br_rsa_i15_public(dst, dlen, pk);
+}
diff --git a/third_party/bearssl/src/rsa_i15_pkcs1_sign.c b/third_party/bearssl/src/rsa_i15_pkcs1_sign.c
new file mode 100644
index 0000000..f519423
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pkcs1_sign.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
+	}
+	return br_rsa_i15_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i15_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i15_pkcs1_vrfy.c
new file mode 100644
index 0000000..2c35184
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pkcs1_vrfy.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i15_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
+}
diff --git a/third_party/bearssl/src/rsa_i15_priv.c b/third_party/bearssl/src/rsa_i15_priv.c
new file mode 100644
index 0000000..177cc3a
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_priv.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define U      (2 + ((BR_MAX_RSA_FACTOR + 14) / 15))
+#define TLEN   (8 * U)
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_private(unsigned char *x, const br_rsa_private_key *sk)
+{
+	const unsigned char *p, *q;
+	size_t plen, qlen;
+	size_t fwlen;
+	uint16_t p0i, q0i;
+	size_t xlen, u;
+	uint16_t tmp[1 + TLEN];
+	long z;
+	uint16_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+	uint32_t r;
+
+	/*
+	 * Compute the actual lengths of p and q, in bytes.
+	 * These lengths are not considered secret (we cannot really hide
+	 * them anyway in constant-time code).
+	 */
+	p = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *p == 0) {
+		p ++;
+		plen --;
+	}
+	q = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *q == 0) {
+		q ++;
+		qlen --;
+	}
+
+	/*
+	 * Compute the maximum factor length, in words.
+	 */
+	z = (long)(plen > qlen ? plen : qlen) << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 15;
+		fwlen ++;
+	}
+	/*
+	 * Round up the word length to an even number.
+	 */
+	fwlen += (fwlen & 1);
+
+	/*
+	 * We need to fit at least 6 values in the stack buffer.
+	 */
+	if (6 * fwlen > TLEN) {
+		return 0;
+	}
+
+	/*
+	 * Compute signature length (in bytes).
+	 */
+	xlen = (sk->n_bitlen + 7) >> 3;
+
+	/*
+	 * Ensure 32-bit alignment for value words.
+	 */
+	mq = tmp;
+	if (((uintptr_t)mq & 2) == 0) {
+		mq ++;
+	}
+
+	/*
+	 * Decode q.
+	 */
+	br_i15_decode(mq, q, qlen);
+
+	/*
+	 * Decode p.
+	 */
+	t1 = mq + fwlen;
+	br_i15_decode(t1, p, plen);
+
+	/*
+	 * Compute the modulus (product of the two factors), to compare
+	 * it with the source value. We use br_i15_mulacc(), since it's
+	 * already used later on.
+	 */
+	t2 = mq + 2 * fwlen;
+	br_i15_zero(t2, mq[0]);
+	br_i15_mulacc(t2, mq, t1);
+
+	/*
+	 * We encode the modulus into bytes, to perform the comparison
+	 * with bytes. We know that the product length, in bytes, is
+	 * exactly xlen.
+	 * The comparison actually computes the carry when subtracting
+	 * the modulus from the source value; that carry must be 1 for
+	 * a value in the correct range. We keep it in r, which is our
+	 * accumulator for the error code.
+	 */
+	t3 = mq + 4 * fwlen;
+	br_i15_encode(t3, xlen, t2);
+	u = xlen;
+	r = 0;
+	while (u > 0) {
+		uint32_t wn, wx;
+
+		u --;
+		wn = ((unsigned char *)t3)[u];
+		wx = x[u];
+		r = ((wx - (wn + r)) >> 8) & 1;
+	}
+
+	/*
+	 * Move the decoded p to another temporary buffer.
+	 */
+	mp = mq + 2 * fwlen;
+	memmove(mp, t1, fwlen * sizeof *t1);
+
+	/*
+	 * Compute s2 = x^dq mod q.
+	 */
+	q0i = br_i15_ninv15(mq[1]);
+	s2 = mq + fwlen;
+	br_i15_decode_reduce(s2, x, xlen, mq);
+	r &= br_i15_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i,
+		mq + 3 * fwlen, TLEN - 3 * fwlen);
+
+	/*
+	 * Compute s1 = x^dq mod q.
+	 */
+	p0i = br_i15_ninv15(mp[1]);
+	s1 = mq + 3 * fwlen;
+	br_i15_decode_reduce(s1, x, xlen, mp);
+	r &= br_i15_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i,
+		mq + 4 * fwlen, TLEN - 4 * fwlen);
+
+	/*
+	 * Compute:
+	 *   h = (s1 - s2)*(1/q) mod p
+	 * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
+	 * unclear about whether p may be lower than q (some existing,
+	 * widely deployed implementations of RSA don't tolerate p < q),
+	 * but we want to support that occurrence, so we need to use the
+	 * reduction function.
+	 *
+	 * Since we use br_i15_decode_reduce() for iq (purportedly, the
+	 * inverse of q modulo p), we also tolerate improperly large
+	 * values for this parameter.
+	 */
+	t1 = mq + 4 * fwlen;
+	t2 = mq + 5 * fwlen;
+	br_i15_reduce(t2, s2, mp);
+	br_i15_add(s1, mp, br_i15_sub(s1, t2, 1));
+	br_i15_to_monty(s1, mp);
+	br_i15_decode_reduce(t1, sk->iq, sk->iqlen, mp);
+	br_i15_montymul(t2, s1, t1, mp, p0i);
+
+	/*
+	 * h is now in t2. We compute the final result:
+	 *   s = s2 + q*h
+	 * All these operations are non-modular.
+	 *
+	 * We need mq, s2 and t2. We use the t3 buffer as destination.
+	 * The buffers mp, s1 and t1 are no longer needed, so we can
+	 * reuse them for t3. Moreover, the first step of the computation
+	 * is to copy s2 into t3, after which s2 is not needed. Right
+	 * now, mq is in slot 0, s2 is in slot 1, and t2 in slot 5.
+	 * Therefore, we have ample room for t3 by simply using s2.
+	 */
+	t3 = s2;
+	br_i15_mulacc(t3, mq, t2);
+
+	/*
+	 * Encode the result. Since we already checked the value of xlen,
+	 * we can just use it right away.
+	 */
+	br_i15_encode(x, xlen, t3);
+
+	/*
+	 * The only error conditions remaining at that point are invalid
+	 * values for p and q (even integers).
+	 */
+	return p0i & q0i & r;
+}
diff --git a/third_party/bearssl/src/rsa_i15_privexp.c b/third_party/bearssl/src/rsa_i15_privexp.c
new file mode 100644
index 0000000..57d6918
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_privexp.c
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i15_compute_privexp(void *d,
+	const br_rsa_private_key *sk, uint32_t e)
+{
+	/*
+	 * We want to invert e modulo phi = (p-1)(q-1). This first
+	 * requires computing phi, which is easy since we have the factors
+	 * p and q in the private key structure.
+	 *
+	 * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.
+	 * We could invert e modulo phi/4 then patch the result to
+	 * modulo phi, but this would involve assembling three modulus-wide
+	 * values (phi/4, 1 and e) and calling moddiv, that requires
+	 * three more temporaries, for a total of six big integers, or
+	 * slightly more than 3 kB of stack space for RSA-4096. This
+	 * exceeds our stack requirements.
+	 *
+	 * Instead, we first use one step of the extended GCD:
+	 *
+	 *   - We compute phi = k*e + r  (Euclidean division of phi by e).
+	 *     If public exponent e is correct, then r != 0 (e must be
+	 *     invertible modulo phi). We also have k != 0 since we
+	 *     enforce non-ridiculously-small factors.
+	 *
+	 *   - We find small u, v such that u*e - v*r = 1  (using a
+	 *     binary GCD; we can arrange for u < r and v < e, i.e. all
+	 *     values fit on 32 bits).
+	 *
+	 *   - Solution is: d = u + v*k
+	 *     This last computation is exact: since u < r and v < e,
+	 *     the above implies d < r + e*((phi-r)/e) = phi
+	 */
+
+	uint16_t tmp[4 * ((BR_MAX_RSA_FACTOR + 14) / 15) + 12];
+	uint16_t *p, *q, *k, *m, *z, *phi;
+	const unsigned char *pbuf, *qbuf;
+	size_t plen, qlen, u, len, dlen;
+	uint32_t r, a, b, u0, v0, u1, v1, he, hr;
+	int i;
+
+	/*
+	 * Check that e is correct.
+	 */
+	if (e < 3 || (e & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Check lengths of p and q, and that they are both odd.
+	 */
+	pbuf = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)
+		|| (pbuf[plen - 1] & 1) != 1)
+	{
+		return 0;
+	}
+	qbuf = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *qbuf == 0) {
+		qbuf ++;
+		qlen --;
+	}
+	if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8)
+		|| (qbuf[qlen - 1] & 1) != 1)
+	{
+		return 0;
+	}
+
+	/*
+	 * Output length is that of the modulus.
+	 */
+	dlen = (sk->n_bitlen + 7) >> 3;
+	if (d == NULL) {
+		return dlen;
+	}
+
+	p = tmp;
+	br_i15_decode(p, pbuf, plen);
+	plen = (p[0] + 15) >> 4;
+	q = p + 1 + plen;
+	br_i15_decode(q, qbuf, qlen);
+	qlen = (q[0] + 15) >> 4;
+
+	/*
+	 * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that
+	 * we do not need anymore). The mulacc function sets the announced
+	 * bit length of t to be the sum of the announced bit lengths of
+	 * p-1 and q-1, which is usually exact but may overshoot by one 1
+	 * bit in some cases; we readjust it to its true length.
+	 */
+	p[1] --;
+	q[1] --;
+	phi = q + 1 + qlen;
+	br_i15_zero(phi, p[0]);
+	br_i15_mulacc(phi, p, q);
+	len = (phi[0] + 15) >> 4;
+	memmove(tmp, phi, (1 + len) * sizeof *phi);
+	phi = tmp;
+	phi[0] = br_i15_bit_length(phi + 1, len);
+	len = (phi[0] + 15) >> 4;
+
+	/*
+	 * Divide phi by public exponent e. The final remainder r must be
+	 * non-zero (otherwise, the key is invalid). The quotient is k,
+	 * which we write over phi, since we don't need phi after that.
+	 */
+	r = 0;
+	for (u = len; u >= 1; u --) {
+		/*
+		 * Upon entry, r < e, and phi[u] < 2^15; hence,
+		 * hi:lo < e*2^15. Thus, the produced word k[u]
+		 * must be lower than 2^15, and the new remainder r
+		 * is lower than e.
+		 */
+		uint32_t hi, lo;
+
+		hi = r >> 17;
+		lo = (r << 15) + phi[u];
+		phi[u] = br_divrem(hi, lo, e, &r);
+	}
+	if (r == 0) {
+		return 0;
+	}
+	k = phi;
+
+	/*
+	 * Compute u and v such that u*e - v*r = GCD(e,r). We use
+	 * a binary GCD algorithm, with 6 extra integers a, b,
+	 * u0, u1, v0 and v1. Initial values are:
+	 *   a = e    u0 = 1   v0 = 0
+	 *   b = r    u1 = r   v1 = e-1
+	 * The following invariants are maintained:
+	 *   a = u0*e - v0*r
+	 *   b = u1*e - v1*r
+	 *   0 < a <= e
+	 *   0 < b <= r
+	 *   0 <= u0 <= r
+	 *   0 <= v0 <= e
+	 *   0 <= u1 <= r
+	 *   0 <= v1 <= e
+	 *
+	 * At each iteration, we reduce either a or b by one bit, and
+	 * adjust u0, u1, v0 and v1 to maintain the invariants:
+	 *  - if a is even, then a <- a/2
+	 *  - otherwise, if b is even, then b <- b/2
+	 *  - otherwise, if a > b, then a <- (a-b)/2
+	 *  - otherwise, if b > a, then b <- (b-a)/2
+	 * Algorithm stops when a = b. At that point, the common value
+	 * is the GCD of e and r; it must be 1 (otherwise, the private
+	 * key or public exponent is not valid). The (u0,v0) or (u1,v1)
+	 * pairs are the solution we are looking for.
+	 *
+	 * Since either a or b is reduced by at least 1 bit at each
+	 * iteration, 62 iterations are enough to reach the end
+	 * condition.
+	 *
+	 * To maintain the invariants, we must compute the same operations
+	 * on the u* and v* values that we do on a and b:
+	 *  - When a is divided by 2, u0 and v0 must be divided by 2.
+	 *  - When b is divided by 2, u1 and v1 must be divided by 2.
+	 *  - When b is subtracted from a, u1 and v1 are subtracted from
+	 *    u0 and v0, respectively.
+	 *  - When a is subtracted from b, u0 and v0 are subtracted from
+	 *    u1 and v1, respectively.
+	 *
+	 * However, we want to keep the u* and v* values in their proper
+	 * ranges. The following remarks apply:
+	 *
+	 *  - When a is divided by 2, then a is even. Therefore:
+	 *
+	 *     * If r is odd, then u0 and v0 must have the same parity;
+	 *       if they are both odd, then adding r to u0 and e to v0
+	 *       makes them both even, and the division by 2 brings them
+	 *       back to the proper range.
+	 *
+	 *     * If r is even, then u0 must be even; if v0 is odd, then
+	 *       adding r to u0 and e to v0 makes them both even, and the
+	 *       division by 2 brings them back to the proper range.
+	 *
+	 *    Thus, all we need to do is to look at the parity of v0,
+	 *    and add (r,e) to (u0,v0) when v0 is odd. In order to avoid
+	 *    a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the
+	 *    division (r+1 does not overflow since r < e; and (e/2)+1
+	 *    is equal to (e+1)/2 since e is odd).
+	 *
+	 *  - When we subtract b from a, three cases may occur:
+	 *
+	 *     * u1 <= u0 and v1 <= v0: just do the subtractions
+	 *
+	 *     * u1 > u0 and v1 > v0: compute:
+	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
+	 *
+	 *     * u1 <= u0 and v1 > v0: compute:
+	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
+	 *
+	 *    The fourth case (u1 > u0 and v1 <= v0) is not possible
+	 *    because it would contradict "b < a" (which is the reason
+	 *    why we subtract b from a).
+	 *
+	 *    The tricky case is the third one: from the equations, it
+	 *    seems that u0 may go out of range. However, the invariants
+	 *    and ranges of other values imply that, in that case, the
+	 *    new u0 does not actually exceed the range.
+	 *
+	 *    We can thus handle the subtraction by adding (r,e) based
+	 *    solely on the comparison between v0 and v1.
+	 */
+	a = e;
+	b = r;
+	u0 = 1;
+	v0 = 0;
+	u1 = r;
+	v1 = e - 1;
+	hr = (r + 1) >> 1;
+	he = (e >> 1) + 1;
+	for (i = 0; i < 62; i ++) {
+		uint32_t oa, ob, agtb, bgta;
+		uint32_t sab, sba, da, db;
+		uint32_t ctl;
+
+		oa = a & 1;                  /* 1 if a is odd */
+		ob = b & 1;                  /* 1 if b is odd */
+		agtb = GT(a, b);             /* 1 if a > b */
+		bgta = GT(b, a);             /* 1 if b > a */
+
+		sab = oa & ob & agtb;        /* 1 if a <- a-b */
+		sba = oa & ob & bgta;        /* 1 if b <- b-a */
+
+		/* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */
+		ctl = GT(v1, v0);
+		a -= b & -sab;
+		u0 -= (u1 - (r & -ctl)) & -sab;
+		v0 -= (v1 - (e & -ctl)) & -sab;
+
+		/* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */
+		ctl = GT(v0, v1);
+		b -= a & -sba;
+		u1 -= (u0 - (r & -ctl)) & -sba;
+		v1 -= (v0 - (e & -ctl)) & -sba;
+
+		da = NOT(oa) | sab;          /* 1 if a <- a/2 */
+		db = (oa & NOT(ob)) | sba;   /* 1 if b <- b/2 */
+
+		/* a <- a/2, u0 <- u0/2, v0 <- v0/2 */
+		ctl = v0 & 1;
+		a ^= (a ^ (a >> 1)) & -da;
+		u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da;
+		v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da;
+
+		/* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */
+		ctl = v1 & 1;
+		b ^= (b ^ (b >> 1)) & -db;
+		u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db;
+		v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db;
+	}
+
+	/*
+	 * Check that the GCD is indeed 1. If not, then the key is invalid
+	 * (and there's no harm in leaking that piece of information).
+	 */
+	if (a != 1) {
+		return 0;
+	}
+
+	/*
+	 * Now we have u0*e - v0*r = 1. Let's compute the result as:
+	 *   d = u0 + v0*k
+	 * We still have k in the tmp[] array, and its announced bit
+	 * length is that of phi.
+	 */
+	m = k + 1 + len;
+	m[0] = (2 << 4) + 2;  /* bit length is 32 bits, encoded */
+	m[1] = v0 & 0x7FFF;
+	m[2] = (v0 >> 15) & 0x7FFF;
+	m[3] = v0 >> 30;
+	z = m + 4;
+	br_i15_zero(z, k[0]);
+	z[1] = u0 & 0x7FFF;
+	z[2] = (u0 >> 15) & 0x7FFF;
+	z[3] = u0 >> 30;
+	br_i15_mulacc(z, k, m);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i15_encode(d, dlen, z);
+	return dlen;
+}
diff --git a/third_party/bearssl/src/rsa_i15_pss_sign.c b/third_party/bearssl/src/rsa_i15_pss_sign.c
new file mode 100644
index 0000000..dd9385b
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pss_sign.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash,
+		salt_len, sk->n_bitlen, x))
+	{
+		return 0;
+	}
+	return br_rsa_i15_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i15_pss_vrfy.c b/third_party/bearssl/src/rsa_i15_pss_vrfy.c
new file mode 100644
index 0000000..7d9f2cb
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pss_vrfy.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i15_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pss_sig_unpad(hf_data, hf_mgf1,
+		hash, salt_len, pk, sig);
+}
diff --git a/third_party/bearssl/src/rsa_i15_pub.c b/third_party/bearssl/src/rsa_i15_pub.c
new file mode 100644
index 0000000..9eab5e8
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pub.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * As a strict minimum, we need four buffers that can hold a
+ * modular integer.
+ */
+#define TLEN   (4 * (2 + ((BR_MAX_RSA_SIZE + 14) / 15)))
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk)
+{
+	const unsigned char *n;
+	size_t nlen;
+	uint16_t tmp[1 + TLEN];
+	uint16_t *m, *a, *t;
+	size_t fwlen;
+	long z;
+	uint16_t m0i;
+	uint32_t r;
+
+	/*
+	 * Get the actual length of the modulus, and see if it fits within
+	 * our stack buffer. We also check that the length of x[] is valid.
+	 */
+	n = pk->n;
+	nlen = pk->nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+	if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) {
+		return 0;
+	}
+	z = (long)nlen << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 15;
+		fwlen ++;
+	}
+	/*
+	 * Round up length to an even number.
+	 */
+	fwlen += (fwlen & 1);
+
+	/*
+	 * The modulus gets decoded into m[].
+	 * The value to exponentiate goes into a[].
+	 * The temporaries for modular exponentiations are in t[].
+	 *
+	 * We want the first value word of each integer to be aligned
+	 * on a 32-bit boundary.
+	 */
+	m = tmp;
+	if (((uintptr_t)m & 2) == 0) {
+		m ++;
+	}
+	a = m + fwlen;
+	t = m + 2 * fwlen;
+
+	/*
+	 * Decode the modulus.
+	 */
+	br_i15_decode(m, n, nlen);
+	m0i = br_i15_ninv15(m[1]);
+
+	/*
+	 * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be
+	 * an odd integer.
+	 */
+	r = m0i & 1;
+
+	/*
+	 * Decode x[] into a[]; we also check that its value is proper.
+	 */
+	r &= br_i15_decode_mod(a, x, xlen, m);
+
+	/*
+	 * Compute the modular exponentiation.
+	 */
+	br_i15_modpow_opt(a, pk->e, pk->elen, m, m0i, t, TLEN - 2 * fwlen);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i15_encode(x, xlen, a);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i15_pubexp.c b/third_party/bearssl/src/rsa_i15_pubexp.c
new file mode 100644
index 0000000..803bff7
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i15_pubexp.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Recompute public exponent, based on factor p and reduced private
+ * exponent dp.
+ */
+static uint32_t
+get_pubexp(const unsigned char *pbuf, size_t plen,
+	const unsigned char *dpbuf, size_t dplen)
+{
+	/*
+	 * dp is the inverse of e modulo p-1. If p = 3 mod 4, then
+	 * p-1 = 2*((p-1)/2). Taken modulo 2, e is odd and has inverse 1;
+	 * thus, dp must be odd.
+	 *
+	 * We compute the inverse of dp modulo (p-1)/2. This requires
+	 * first reducing dp modulo (p-1)/2 (this can be done with a
+	 * conditional subtract, no need to use the generic modular
+	 * reduction function); then, we use moddiv.
+	 */
+
+	uint16_t tmp[6 * ((BR_MAX_RSA_FACTOR + 29) / 15)];
+	uint16_t *p, *dp, *x;
+	size_t len;
+	uint32_t e;
+
+	/*
+	 * Compute actual factor length (in bytes) and check that it fits
+	 * under our size constraints.
+	 */
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	if (plen == 0 || plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)) {
+		return 0;
+	}
+
+	/*
+	 * Compute actual reduced exponent length (in bytes) and check that
+	 * it is not longer than p.
+	 */
+	while (dplen > 0 && *dpbuf == 0) {
+		dpbuf ++;
+		dplen --;
+	}
+	if (dplen > plen || dplen == 0
+		|| (dplen == plen && dpbuf[0] > pbuf[0]))
+	{
+		return 0;
+	}
+
+	/*
+	 * Verify that p = 3 mod 4 and that dp is odd.
+	 */
+	if ((pbuf[plen - 1] & 3) != 3 || (dpbuf[dplen - 1] & 1) != 1) {
+		return 0;
+	}
+
+	/*
+	 * Decode p and compute (p-1)/2.
+	 */
+	p = tmp;
+	br_i15_decode(p, pbuf, plen);
+	len = (p[0] + 31) >> 4;
+	br_i15_rshift(p, 1);
+
+	/*
+	 * Decode dp and make sure its announced bit length matches that of
+	 * p (we already know that the size of dp, in bits, does not exceed
+	 * the size of p, so we just have to copy the header word).
+	 */
+	dp = p + len;
+	memset(dp, 0, len * sizeof *dp);
+	br_i15_decode(dp, dpbuf, dplen);
+	dp[0] = p[0];
+
+	/*
+	 * Subtract (p-1)/2 from dp if necessary.
+	 */
+	br_i15_sub(dp, p, NOT(br_i15_sub(dp, p, 0)));
+
+	/*
+	 * If another subtraction is needed, then this means that the
+	 * value was invalid. We don't care to leak information about
+	 * invalid keys.
+	 */
+	if (br_i15_sub(dp, p, 0) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Invert dp modulo (p-1)/2. If the inversion fails, then the
+	 * key value was invalid.
+	 */
+	x = dp + len;
+	br_i15_zero(x, p[0]);
+	x[1] = 1;
+	if (br_i15_moddiv(x, dp, p, br_i15_ninv15(p[1]), x + len) == 0) {
+		return 0;
+	}
+
+	/*
+	 * We now have an inverse. We must set it to zero (error) if its
+	 * length is greater than 32 bits and/or if it is an even integer.
+	 * Take care that the bit_length function returns an encoded
+	 * bit length.
+	 */
+	e = (uint32_t)x[1] | ((uint32_t)x[2] << 15) | ((uint32_t)x[3] << 30);
+	e &= -LT(br_i15_bit_length(x + 1, len - 1), 35);
+	e &= -(e & 1);
+	return e;
+}
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_compute_pubexp(const br_rsa_private_key *sk)
+{
+	/*
+	 * Get the public exponent from both p and q. This is the right
+	 * exponent if we get twice the same value.
+	 */
+	uint32_t ep, eq;
+
+	ep = get_pubexp(sk->p, sk->plen, sk->dp, sk->dplen);
+	eq = get_pubexp(sk->q, sk->qlen, sk->dq, sk->dqlen);
+	return ep & -EQ(ep, eq);
+}
diff --git a/third_party/bearssl/src/rsa_i31_keygen.c b/third_party/bearssl/src/rsa_i31_keygen.c
new file mode 100644
index 0000000..77708f8
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_keygen.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_keygen(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp)
+{
+	return br_rsa_i31_keygen_inner(rng,
+		sk, kbuf_priv, pk, kbuf_pub, size, pubexp,
+		&br_i31_modpow_opt);
+}
diff --git a/third_party/bearssl/src/rsa_i31_keygen_inner.c b/third_party/bearssl/src/rsa_i31_keygen_inner.c
new file mode 100644
index 0000000..98df445
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_keygen_inner.c
@@ -0,0 +1,608 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Make a random integer of the provided size. The size is encoded.
+ * The header word is untouched.
+ */
+static void
+mkrand(const br_prng_class **rng, uint32_t *x, uint32_t esize)
+{
+	size_t u, len;
+	unsigned m;
+
+	len = (esize + 31) >> 5;
+	(*rng)->generate(rng, x + 1, len * sizeof(uint32_t));
+	for (u = 1; u < len; u ++) {
+		x[u] &= 0x7FFFFFFF;
+	}
+	m = esize & 31;
+	if (m == 0) {
+		x[len] &= 0x7FFFFFFF;
+	} else {
+		x[len] &= 0x7FFFFFFF >> (31 - m);
+	}
+}
+
+/*
+ * This is the big-endian unsigned representation of the product of
+ * all small primes from 13 to 1481.
+ */
+static const unsigned char SMALL_PRIMES[] = {
+	0x2E, 0xAB, 0x92, 0xD1, 0x8B, 0x12, 0x47, 0x31, 0x54, 0x0A,
+	0x99, 0x5D, 0x25, 0x5E, 0xE2, 0x14, 0x96, 0x29, 0x1E, 0xB7,
+	0x78, 0x70, 0xCC, 0x1F, 0xA5, 0xAB, 0x8D, 0x72, 0x11, 0x37,
+	0xFB, 0xD8, 0x1E, 0x3F, 0x5B, 0x34, 0x30, 0x17, 0x8B, 0xE5,
+	0x26, 0x28, 0x23, 0xA1, 0x8A, 0xA4, 0x29, 0xEA, 0xFD, 0x9E,
+	0x39, 0x60, 0x8A, 0xF3, 0xB5, 0xA6, 0xEB, 0x3F, 0x02, 0xB6,
+	0x16, 0xC3, 0x96, 0x9D, 0x38, 0xB0, 0x7D, 0x82, 0x87, 0x0C,
+	0xF7, 0xBE, 0x24, 0xE5, 0x5F, 0x41, 0x04, 0x79, 0x76, 0x40,
+	0xE7, 0x00, 0x22, 0x7E, 0xB5, 0x85, 0x7F, 0x8D, 0x01, 0x50,
+	0xE9, 0xD3, 0x29, 0x42, 0x08, 0xB3, 0x51, 0x40, 0x7B, 0xD7,
+	0x8D, 0xCC, 0x10, 0x01, 0x64, 0x59, 0x28, 0xB6, 0x53, 0xF3,
+	0x50, 0x4E, 0xB1, 0xF2, 0x58, 0xCD, 0x6E, 0xF5, 0x56, 0x3E,
+	0x66, 0x2F, 0xD7, 0x07, 0x7F, 0x52, 0x4C, 0x13, 0x24, 0xDC,
+	0x8E, 0x8D, 0xCC, 0xED, 0x77, 0xC4, 0x21, 0xD2, 0xFD, 0x08,
+	0xEA, 0xD7, 0xC0, 0x5C, 0x13, 0x82, 0x81, 0x31, 0x2F, 0x2B,
+	0x08, 0xE4, 0x80, 0x04, 0x7A, 0x0C, 0x8A, 0x3C, 0xDC, 0x22,
+	0xE4, 0x5A, 0x7A, 0xB0, 0x12, 0x5E, 0x4A, 0x76, 0x94, 0x77,
+	0xC2, 0x0E, 0x92, 0xBA, 0x8A, 0xA0, 0x1F, 0x14, 0x51, 0x1E,
+	0x66, 0x6C, 0x38, 0x03, 0x6C, 0xC7, 0x4A, 0x4B, 0x70, 0x80,
+	0xAF, 0xCA, 0x84, 0x51, 0xD8, 0xD2, 0x26, 0x49, 0xF5, 0xA8,
+	0x5E, 0x35, 0x4B, 0xAC, 0xCE, 0x29, 0x92, 0x33, 0xB7, 0xA2,
+	0x69, 0x7D, 0x0C, 0xE0, 0x9C, 0xDB, 0x04, 0xD6, 0xB4, 0xBC,
+	0x39, 0xD7, 0x7F, 0x9E, 0x9D, 0x78, 0x38, 0x7F, 0x51, 0x54,
+	0x50, 0x8B, 0x9E, 0x9C, 0x03, 0x6C, 0xF5, 0x9D, 0x2C, 0x74,
+	0x57, 0xF0, 0x27, 0x2A, 0xC3, 0x47, 0xCA, 0xB9, 0xD7, 0x5C,
+	0xFF, 0xC2, 0xAC, 0x65, 0x4E, 0xBD
+};
+
+/*
+ * We need temporary values for at least 7 integers of the same size
+ * as a factor (including header word); more space helps with performance
+ * (in modular exponentiations), but we much prefer to remain under
+ * 2 kilobytes in total, to save stack space. The macro TEMPS below
+ * exceeds 512 (which is a count in 32-bit words) when BR_MAX_RSA_SIZE
+ * is greater than 4464 (default value is 4096, so the 2-kB limit is
+ * maintained unless BR_MAX_RSA_SIZE was modified).
+ */
+#define MAX(x, y)   ((x) > (y) ? (x) : (y))
+#define ROUND2(x)   ((((x) + 1) >> 1) << 1)
+
+#define TEMPS   MAX(512, ROUND2(7 * ((((BR_MAX_RSA_SIZE + 1) >> 1) + 61) / 31)))
+
+/*
+ * Perform trial division on a candidate prime. This computes
+ * y = SMALL_PRIMES mod x, then tries to compute y/y mod x. The
+ * br_i31_moddiv() function will report an error if y is not invertible
+ * modulo x. Returned value is 1 on success (none of the small primes
+ * divides x), 0 on error (a non-trivial GCD is obtained).
+ *
+ * This function assumes that x is odd.
+ */
+static uint32_t
+trial_divisions(const uint32_t *x, uint32_t *t)
+{
+	uint32_t *y;
+	uint32_t x0i;
+
+	y = t;
+	t += 1 + ((x[0] + 31) >> 5);
+	x0i = br_i31_ninv31(x[1]);
+	br_i31_decode_reduce(y, SMALL_PRIMES, sizeof SMALL_PRIMES, x);
+	return br_i31_moddiv(y, y, x, x0i, t);
+}
+
+/*
+ * Perform n rounds of Miller-Rabin on the candidate prime x. This
+ * function assumes that x = 3 mod 4.
+ *
+ * Returned value is 1 on success (all rounds completed successfully),
+ * 0 otherwise.
+ */
+static uint32_t
+miller_rabin(const br_prng_class **rng, const uint32_t *x, int n,
+	uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31)
+{
+	/*
+	 * Since x = 3 mod 4, the Miller-Rabin test is simple:
+	 *  - get a random base a (such that 1 < a < x-1)
+	 *  - compute z = a^((x-1)/2) mod x
+	 *  - if z != 1 and z != x-1, the number x is composite
+	 *
+	 * We generate bases 'a' randomly with a size which is
+	 * one bit less than x, which ensures that a < x-1. It
+	 * is not useful to verify that a > 1 because the probability
+	 * that we get a value a equal to 0 or 1 is much smaller
+	 * than the probability of our Miller-Rabin tests not to
+	 * detect a composite, which is already quite smaller than the
+	 * probability of the hardware misbehaving and return a
+	 * composite integer because of some glitch (e.g. bad RAM
+	 * or ill-timed cosmic ray).
+	 */
+	unsigned char *xm1d2;
+	size_t xlen, xm1d2_len, xm1d2_len_u32, u;
+	uint32_t asize;
+	unsigned cc;
+	uint32_t x0i;
+
+	/*
+	 * Compute (x-1)/2 (encoded).
+	 */
+	xm1d2 = (unsigned char *)t;
+	xm1d2_len = ((x[0] - (x[0] >> 5)) + 7) >> 3;
+	br_i31_encode(xm1d2, xm1d2_len, x);
+	cc = 0;
+	for (u = 0; u < xm1d2_len; u ++) {
+		unsigned w;
+
+		w = xm1d2[u];
+		xm1d2[u] = (unsigned char)((w >> 1) | cc);
+		cc = w << 7;
+	}
+
+	/*
+	 * We used some words of the provided buffer for (x-1)/2.
+	 */
+	xm1d2_len_u32 = (xm1d2_len + 3) >> 2;
+	t += xm1d2_len_u32;
+	tlen -= xm1d2_len_u32;
+
+	xlen = (x[0] + 31) >> 5;
+	asize = x[0] - 1 - EQ0(x[0] & 31);
+	x0i = br_i31_ninv31(x[1]);
+	while (n -- > 0) {
+		uint32_t *a, *t2;
+		uint32_t eq1, eqm1;
+		size_t t2len;
+
+		/*
+		 * Generate a random base. We don't need the base to be
+		 * really uniform modulo x, so we just get a random
+		 * number which is one bit shorter than x.
+		 */
+		a = t;
+		a[0] = x[0];
+		a[xlen] = 0;
+		mkrand(rng, a, asize);
+
+		/*
+		 * Compute a^((x-1)/2) mod x. We assume here that the
+		 * function will not fail (the temporary array is large
+		 * enough).
+		 */
+		t2 = t + 1 + xlen;
+		t2len = tlen - 1 - xlen;
+		if ((t2len & 1) != 0) {
+			/*
+			 * Since the source array is 64-bit aligned and
+			 * has an even number of elements (TEMPS), we
+			 * can use the parity of the remaining length to
+			 * detect and adjust alignment.
+			 */
+			t2 ++;
+			t2len --;
+		}
+		mp31(a, xm1d2, xm1d2_len, x, x0i, t2, t2len);
+
+		/*
+		 * We must obtain either 1 or x-1. Note that x is odd,
+		 * hence x-1 differs from x only in its low word (no
+		 * carry).
+		 */
+		eq1 = a[1] ^ 1;
+		eqm1 = a[1] ^ (x[1] - 1);
+		for (u = 2; u <= xlen; u ++) {
+			eq1 |= a[u];
+			eqm1 |= a[u] ^ x[u];
+		}
+
+		if ((EQ0(eq1) | EQ0(eqm1)) == 0) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+/*
+ * Create a random prime of the provided size. 'size' is the _encoded_
+ * bit length. The two top bits and the two bottom bits are set to 1.
+ */
+static void
+mkprime(const br_prng_class **rng, uint32_t *x, uint32_t esize,
+	uint32_t pubexp, uint32_t *t, size_t tlen, br_i31_modpow_opt_type mp31)
+{
+	size_t len;
+
+	x[0] = esize;
+	len = (esize + 31) >> 5;
+	for (;;) {
+		size_t u;
+		uint32_t m3, m5, m7, m11;
+		int rounds, s7, s11;
+
+		/*
+		 * Generate random bits. We force the two top bits and the
+		 * two bottom bits to 1.
+		 */
+		mkrand(rng, x, esize);
+		if ((esize & 31) == 0) {
+			x[len] |= 0x60000000;
+		} else if ((esize & 31) == 1) {
+			x[len] |= 0x00000001;
+			x[len - 1] |= 0x40000000;
+		} else {
+			x[len] |= 0x00000003 << ((esize & 31) - 2);
+		}
+		x[1] |= 0x00000003;
+
+		/*
+		 * Trial division with low primes (3, 5, 7 and 11). We
+		 * use the following properties:
+		 *
+		 *   2^2 = 1 mod 3
+		 *   2^4 = 1 mod 5
+		 *   2^3 = 1 mod 7
+		 *   2^10 = 1 mod 11
+		 */
+		m3 = 0;
+		m5 = 0;
+		m7 = 0;
+		m11 = 0;
+		s7 = 0;
+		s11 = 0;
+		for (u = 0; u < len; u ++) {
+			uint32_t w, w3, w5, w7, w11;
+
+			w = x[1 + u];
+			w3 = (w & 0xFFFF) + (w >> 16);     /* max: 98302 */
+			w5 = (w & 0xFFFF) + (w >> 16);     /* max: 98302 */
+			w7 = (w & 0x7FFF) + (w >> 15);     /* max: 98302 */
+			w11 = (w & 0xFFFFF) + (w >> 20);   /* max: 1050622 */
+
+			m3 += w3 << (u & 1);
+			m3 = (m3 & 0xFF) + (m3 >> 8);      /* max: 1025 */
+
+			m5 += w5 << ((4 - u) & 3);
+			m5 = (m5 & 0xFFF) + (m5 >> 12);    /* max: 4479 */
+
+			m7 += w7 << s7;
+			m7 = (m7 & 0x1FF) + (m7 >> 9);     /* max: 1280 */
+			if (++ s7 == 3) {
+				s7 = 0;
+			}
+
+			m11 += w11 << s11;
+			if (++ s11 == 10) {
+				s11 = 0;
+			}
+			m11 = (m11 & 0x3FF) + (m11 >> 10); /* max: 526847 */
+		}
+
+		m3 = (m3 & 0x3F) + (m3 >> 6);      /* max: 78 */
+		m3 = (m3 & 0x0F) + (m3 >> 4);      /* max: 18 */
+		m3 = ((m3 * 43) >> 5) & 3;
+
+		m5 = (m5 & 0xFF) + (m5 >> 8);      /* max: 271 */
+		m5 = (m5 & 0x0F) + (m5 >> 4);      /* max: 31 */
+		m5 -= 20 & -GT(m5, 19);
+		m5 -= 10 & -GT(m5, 9);
+		m5 -= 5 & -GT(m5, 4);
+
+		m7 = (m7 & 0x3F) + (m7 >> 6);      /* max: 82 */
+		m7 = (m7 & 0x07) + (m7 >> 3);      /* max: 16 */
+		m7 = ((m7 * 147) >> 7) & 7;
+
+		/*
+		 * 2^5 = 32 = -1 mod 11.
+		 */
+		m11 = (m11 & 0x3FF) + (m11 >> 10);      /* max: 1536 */
+		m11 = (m11 & 0x3FF) + (m11 >> 10);      /* max: 1023 */
+		m11 = (m11 & 0x1F) + 33 - (m11 >> 5);   /* max: 64 */
+		m11 -= 44 & -GT(m11, 43);
+		m11 -= 22 & -GT(m11, 21);
+		m11 -= 11 & -GT(m11, 10);
+
+		/*
+		 * If any of these modulo is 0, then the candidate is
+		 * not prime. Also, if pubexp is 3, 5, 7 or 11, and the
+		 * corresponding modulus is 1, then the candidate must
+		 * be rejected, because we need e to be invertible
+		 * modulo p-1. We can use simple comparisons here
+		 * because they won't leak information on a candidate
+		 * that we keep, only on one that we reject (and is thus
+		 * not secret).
+		 */
+		if (m3 == 0 || m5 == 0 || m7 == 0 || m11 == 0) {
+			continue;
+		}
+		if ((pubexp == 3 && m3 == 1)
+			|| (pubexp == 5 && m5 == 1)
+			|| (pubexp == 7 && m7 == 1)
+			|| (pubexp == 11 && m11 == 1))
+		{
+			continue;
+		}
+
+		/*
+		 * More trial divisions.
+		 */
+		if (!trial_divisions(x, t)) {
+			continue;
+		}
+
+		/*
+		 * Miller-Rabin algorithm. Since we selected a random
+		 * integer, not a maliciously crafted integer, we can use
+		 * relatively few rounds to lower the risk of a false
+		 * positive (i.e. declaring prime a non-prime) under
+		 * 2^(-80). It is not useful to lower the probability much
+		 * below that, since that would be substantially below
+		 * the probability of the hardware misbehaving. Sufficient
+		 * numbers of rounds are extracted from the Handbook of
+		 * Applied Cryptography, note 4.49 (page 149).
+		 *
+		 * Since we work on the encoded size (esize), we need to
+		 * compare with encoded thresholds.
+		 */
+		if (esize < 309) {
+			rounds = 12;
+		} else if (esize < 464) {
+			rounds = 9;
+		} else if (esize < 670) {
+			rounds = 6;
+		} else if (esize < 877) {
+			rounds = 4;
+		} else if (esize < 1341) {
+			rounds = 3;
+		} else {
+			rounds = 2;
+		}
+
+		if (miller_rabin(rng, x, rounds, t, tlen, mp31)) {
+			return;
+		}
+	}
+}
+
+/*
+ * Let p be a prime (p > 2^33, p = 3 mod 4). Let m = (p-1)/2, provided
+ * as parameter (with announced bit length equal to that of p). This
+ * function computes d = 1/e mod p-1 (for an odd integer e). Returned
+ * value is 1 on success, 0 on error (an error is reported if e is not
+ * invertible modulo p-1).
+ *
+ * The temporary buffer (t) must have room for at least 4 integers of
+ * the size of p.
+ */
+static uint32_t
+invert_pubexp(uint32_t *d, const uint32_t *m, uint32_t e, uint32_t *t)
+{
+	uint32_t *f;
+	uint32_t r;
+
+	f = t;
+	t += 1 + ((m[0] + 31) >> 5);
+
+	/*
+	 * Compute d = 1/e mod m. Since p = 3 mod 4, m is odd.
+	 */
+	br_i31_zero(d, m[0]);
+	d[1] = 1;
+	br_i31_zero(f, m[0]);
+	f[1] = e & 0x7FFFFFFF;
+	f[2] = e >> 31;
+	r = br_i31_moddiv(d, f, m, br_i31_ninv31(m[1]), t);
+
+	/*
+	 * We really want d = 1/e mod p-1, with p = 2m. By the CRT,
+	 * the result is either the d we got, or d + m.
+	 *
+	 * Let's write e*d = 1 + k*m, for some integer k. Integers e
+	 * and m are odd. If d is odd, then e*d is odd, which implies
+	 * that k must be even; in that case, e*d = 1 + (k/2)*2m, and
+	 * thus d is already fine. Conversely, if d is even, then k
+	 * is odd, and we must add m to d in order to get the correct
+	 * result.
+	 */
+	br_i31_add(d, m, (uint32_t)(1 - (d[1] & 1)));
+
+	return r;
+}
+
+/*
+ * Swap two buffers in RAM. They must be disjoint.
+ */
+static void
+bufswap(void *b1, void *b2, size_t len)
+{
+	size_t u;
+	unsigned char *buf1, *buf2;
+
+	buf1 = b1;
+	buf2 = b2;
+	for (u = 0; u < len; u ++) {
+		unsigned w;
+
+		w = buf1[u];
+		buf1[u] = buf2[u];
+		buf2[u] = w;
+	}
+}
+
+/* see inner.h */
+uint32_t
+br_rsa_i31_keygen_inner(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp, br_i31_modpow_opt_type mp31)
+{
+	uint32_t esize_p, esize_q;
+	size_t plen, qlen, tlen;
+	uint32_t *p, *q, *t;
+	union {
+		uint32_t t32[TEMPS];
+		uint64_t t64[TEMPS >> 1];  /* for 64-bit alignment */
+	} tmp;
+	uint32_t r;
+
+	if (size < BR_MIN_RSA_SIZE || size > BR_MAX_RSA_SIZE) {
+		return 0;
+	}
+	if (pubexp == 0) {
+		pubexp = 3;
+	} else if (pubexp == 1 || (pubexp & 1) == 0) {
+		return 0;
+	}
+
+	esize_p = (size + 1) >> 1;
+	esize_q = size - esize_p;
+	sk->n_bitlen = size;
+	sk->p = kbuf_priv;
+	sk->plen = (esize_p + 7) >> 3;
+	sk->q = sk->p + sk->plen;
+	sk->qlen = (esize_q + 7) >> 3;
+	sk->dp = sk->q + sk->qlen;
+	sk->dplen = sk->plen;
+	sk->dq = sk->dp + sk->dplen;
+	sk->dqlen = sk->qlen;
+	sk->iq = sk->dq + sk->dqlen;
+	sk->iqlen = sk->plen;
+
+	if (pk != NULL) {
+		pk->n = kbuf_pub;
+		pk->nlen = (size + 7) >> 3;
+		pk->e = pk->n + pk->nlen;
+		pk->elen = 4;
+		br_enc32be(pk->e, pubexp);
+		while (*pk->e == 0) {
+			pk->e ++;
+			pk->elen --;
+		}
+	}
+
+	/*
+	 * We now switch to encoded sizes.
+	 *
+	 * floor((x * 16913) / (2^19)) is equal to floor(x/31) for all
+	 * integers x from 0 to 34966; the intermediate product fits on
+	 * 30 bits, thus we can use MUL31().
+	 */
+	esize_p += MUL31(esize_p, 16913) >> 19;
+	esize_q += MUL31(esize_q, 16913) >> 19;
+	plen = (esize_p + 31) >> 5;
+	qlen = (esize_q + 31) >> 5;
+	p = tmp.t32;
+	q = p + 1 + plen;
+	t = q + 1 + qlen;
+	tlen = ((sizeof tmp.t32) / sizeof(uint32_t)) - (2 + plen + qlen);
+
+	/*
+	 * When looking for primes p and q, we temporarily divide
+	 * candidates by 2, in order to compute the inverse of the
+	 * public exponent.
+	 */
+
+	for (;;) {
+		mkprime(rng, p, esize_p, pubexp, t, tlen, mp31);
+		br_i31_rshift(p, 1);
+		if (invert_pubexp(t, p, pubexp, t + 1 + plen)) {
+			br_i31_add(p, p, 1);
+			p[1] |= 1;
+			br_i31_encode(sk->p, sk->plen, p);
+			br_i31_encode(sk->dp, sk->dplen, t);
+			break;
+		}
+	}
+
+	for (;;) {
+		mkprime(rng, q, esize_q, pubexp, t, tlen, mp31);
+		br_i31_rshift(q, 1);
+		if (invert_pubexp(t, q, pubexp, t + 1 + qlen)) {
+			br_i31_add(q, q, 1);
+			q[1] |= 1;
+			br_i31_encode(sk->q, sk->qlen, q);
+			br_i31_encode(sk->dq, sk->dqlen, t);
+			break;
+		}
+	}
+
+	/*
+	 * If p and q have the same size, then it is possible that q > p
+	 * (when the target modulus size is odd, we generate p with a
+	 * greater bit length than q). If q > p, we want to swap p and q
+	 * (and also dp and dq) for two reasons:
+	 *  - The final step below (inversion of q modulo p) is easier if
+	 *    p > q.
+	 *  - While BearSSL's RSA code is perfectly happy with RSA keys such
+	 *    that p < q, some other implementations have restrictions and
+	 *    require p > q.
+	 *
+	 * Note that we can do a simple non-constant-time swap here,
+	 * because the only information we leak here is that we insist on
+	 * returning p and q such that p > q, which is not a secret.
+	 */
+	if (esize_p == esize_q && br_i31_sub(p, q, 0) == 1) {
+		bufswap(p, q, (1 + plen) * sizeof *p);
+		bufswap(sk->p, sk->q, sk->plen);
+		bufswap(sk->dp, sk->dq, sk->dplen);
+	}
+
+	/*
+	 * We have produced p, q, dp and dq. We can now compute iq = 1/d mod p.
+	 *
+	 * We ensured that p >= q, so this is just a matter of updating the
+	 * header word for q (and possibly adding an extra word).
+	 *
+	 * Theoretically, the call below may fail, in case we were
+	 * extraordinarily unlucky, and p = q. Another failure case is if
+	 * Miller-Rabin failed us _twice_, and p and q are non-prime and
+	 * have a factor is common. We report the error mostly because it
+	 * is cheap and we can, but in practice this never happens (or, at
+	 * least, it happens way less often than hardware glitches).
+	 */
+	q[0] = p[0];
+	if (plen > qlen) {
+		q[plen] = 0;
+		t ++;
+		tlen --;
+	}
+	br_i31_zero(t, p[0]);
+	t[1] = 1;
+	r = br_i31_moddiv(t, q, p, br_i31_ninv31(p[1]), t + 1 + plen);
+	br_i31_encode(sk->iq, sk->iqlen, t);
+
+	/*
+	 * Compute the public modulus too, if required.
+	 */
+	if (pk != NULL) {
+		br_i31_zero(t, p[0]);
+		br_i31_mulacc(t, p, q);
+		br_i31_encode(pk->n, pk->nlen, t);
+	}
+
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i31_modulus.c b/third_party/bearssl/src/rsa_i31_modulus.c
new file mode 100644
index 0000000..f5f997f
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_modulus.c
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i31_compute_modulus(void *n, const br_rsa_private_key *sk)
+{
+	uint32_t tmp[4 * (((BR_MAX_RSA_SIZE / 2) + 30) / 31) + 5];
+	uint32_t *t, *p, *q;
+	const unsigned char *pbuf, *qbuf;
+	size_t nlen, plen, qlen, tlen;
+
+	/*
+	 * Compute actual byte and lengths for p and q.
+	 */
+	pbuf = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	qbuf = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *qbuf == 0) {
+		qbuf ++;
+		qlen --;
+	}
+
+	t = tmp;
+	tlen = (sizeof tmp) / (sizeof tmp[0]);
+
+	/*
+	 * Decode p.
+	 */
+	if ((31 * tlen) < (plen << 3) + 31) {
+		return 0;
+	}
+	br_i31_decode(t, pbuf, plen);
+	p = t;
+	plen = (p[0] + 63) >> 5;
+	t += plen;
+	tlen -= plen;
+
+	/*
+	 * Decode q.
+	 */
+	if ((31 * tlen) < (qlen << 3) + 31) {
+		return 0;
+	}
+	br_i31_decode(t, qbuf, qlen);
+	q = t;
+	qlen = (q[0] + 63) >> 5;
+	t += qlen;
+	tlen -= qlen;
+
+	/*
+	 * Computation can proceed only if we have enough room for the
+	 * modulus.
+	 */
+	if (tlen < (plen + qlen + 1)) {
+		return 0;
+	}
+
+	/*
+	 * Private key already contains the modulus bit length, from which
+	 * we can infer the output length. Even if n is NULL, we still had
+	 * to decode p and q to make sure that the product can be computed.
+	 */
+	nlen = (sk->n_bitlen + 7) >> 3;
+	if (n != NULL) {
+		br_i31_zero(t, p[0]);
+		br_i31_mulacc(t, p, q);
+		br_i31_encode(n, nlen, t);
+	}
+	return nlen;
+}
diff --git a/third_party/bearssl/src/rsa_i31_oaep_decrypt.c b/third_party/bearssl/src/rsa_i31_oaep_decrypt.c
new file mode 100644
index 0000000..06fdd93
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_oaep_decrypt.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_oaep_decrypt(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len)
+{
+	uint32_t r;
+
+	if (*len != ((sk->n_bitlen + 7) >> 3)) {
+		return 0;
+	}
+	r = br_rsa_i31_private(data, sk);
+	r &= br_rsa_oaep_unpad(dig, label, label_len, data, len);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i31_oaep_encrypt.c b/third_party/bearssl/src/rsa_i31_oaep_encrypt.c
new file mode 100644
index 0000000..367008c
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_oaep_encrypt.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i31_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t dlen;
+
+	dlen = br_rsa_oaep_pad(rnd, dig, label, label_len,
+		pk, dst, dst_max_len, src, src_len);
+	if (dlen == 0) {
+		return 0;
+	}
+	return dlen & -(size_t)br_rsa_i31_public(dst, dlen, pk);
+}
diff --git a/third_party/bearssl/src/rsa_i31_pkcs1_sign.c b/third_party/bearssl/src/rsa_i31_pkcs1_sign.c
new file mode 100644
index 0000000..784d3c2
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pkcs1_sign.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
+	}
+	return br_rsa_i31_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i31_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i31_pkcs1_vrfy.c
new file mode 100644
index 0000000..e79a002
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pkcs1_vrfy.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i31_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
+}
diff --git a/third_party/bearssl/src/rsa_i31_priv.c b/third_party/bearssl/src/rsa_i31_priv.c
new file mode 100644
index 0000000..b1e1244
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_priv.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define U      (2 + ((BR_MAX_RSA_FACTOR + 30) / 31))
+#define TLEN   (8 * U)
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_private(unsigned char *x, const br_rsa_private_key *sk)
+{
+	const unsigned char *p, *q;
+	size_t plen, qlen;
+	size_t fwlen;
+	uint32_t p0i, q0i;
+	size_t xlen, u;
+	uint32_t tmp[1 + TLEN];
+	long z;
+	uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+	uint32_t r;
+
+	/*
+	 * Compute the actual lengths of p and q, in bytes.
+	 * These lengths are not considered secret (we cannot really hide
+	 * them anyway in constant-time code).
+	 */
+	p = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *p == 0) {
+		p ++;
+		plen --;
+	}
+	q = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *q == 0) {
+		q ++;
+		qlen --;
+	}
+
+	/*
+	 * Compute the maximum factor length, in words.
+	 */
+	z = (long)(plen > qlen ? plen : qlen) << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 31;
+		fwlen ++;
+	}
+
+	/*
+	 * Round up the word length to an even number.
+	 */
+	fwlen += (fwlen & 1);
+
+	/*
+	 * We need to fit at least 6 values in the stack buffer.
+	 */
+	if (6 * fwlen > TLEN) {
+		return 0;
+	}
+
+	/*
+	 * Compute modulus length (in bytes).
+	 */
+	xlen = (sk->n_bitlen + 7) >> 3;
+
+	/*
+	 * Decode q.
+	 */
+	mq = tmp;
+	br_i31_decode(mq, q, qlen);
+
+	/*
+	 * Decode p.
+	 */
+	t1 = mq + fwlen;
+	br_i31_decode(t1, p, plen);
+
+	/*
+	 * Compute the modulus (product of the two factors), to compare
+	 * it with the source value. We use br_i31_mulacc(), since it's
+	 * already used later on.
+	 */
+	t2 = mq + 2 * fwlen;
+	br_i31_zero(t2, mq[0]);
+	br_i31_mulacc(t2, mq, t1);
+
+	/*
+	 * We encode the modulus into bytes, to perform the comparison
+	 * with bytes. We know that the product length, in bytes, is
+	 * exactly xlen.
+	 * The comparison actually computes the carry when subtracting
+	 * the modulus from the source value; that carry must be 1 for
+	 * a value in the correct range. We keep it in r, which is our
+	 * accumulator for the error code.
+	 */
+	t3 = mq + 4 * fwlen;
+	br_i31_encode(t3, xlen, t2);
+	u = xlen;
+	r = 0;
+	while (u > 0) {
+		uint32_t wn, wx;
+
+		u --;
+		wn = ((unsigned char *)t3)[u];
+		wx = x[u];
+		r = ((wx - (wn + r)) >> 8) & 1;
+	}
+
+	/*
+	 * Move the decoded p to another temporary buffer.
+	 */
+	mp = mq + 2 * fwlen;
+	memmove(mp, t1, fwlen * sizeof *t1);
+
+	/*
+	 * Compute s2 = x^dq mod q.
+	 */
+	q0i = br_i31_ninv31(mq[1]);
+	s2 = mq + fwlen;
+	br_i31_decode_reduce(s2, x, xlen, mq);
+	r &= br_i31_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i,
+		mq + 3 * fwlen, TLEN - 3 * fwlen);
+
+	/*
+	 * Compute s1 = x^dp mod p.
+	 */
+	p0i = br_i31_ninv31(mp[1]);
+	s1 = mq + 3 * fwlen;
+	br_i31_decode_reduce(s1, x, xlen, mp);
+	r &= br_i31_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i,
+		mq + 4 * fwlen, TLEN - 4 * fwlen);
+
+	/*
+	 * Compute:
+	 *   h = (s1 - s2)*(1/q) mod p
+	 * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
+	 * unclear about whether p may be lower than q (some existing,
+	 * widely deployed implementations of RSA don't tolerate p < q),
+	 * but we want to support that occurrence, so we need to use the
+	 * reduction function.
+	 *
+	 * Since we use br_i31_decode_reduce() for iq (purportedly, the
+	 * inverse of q modulo p), we also tolerate improperly large
+	 * values for this parameter.
+	 */
+	t1 = mq + 4 * fwlen;
+	t2 = mq + 5 * fwlen;
+	br_i31_reduce(t2, s2, mp);
+	br_i31_add(s1, mp, br_i31_sub(s1, t2, 1));
+	br_i31_to_monty(s1, mp);
+	br_i31_decode_reduce(t1, sk->iq, sk->iqlen, mp);
+	br_i31_montymul(t2, s1, t1, mp, p0i);
+
+	/*
+	 * h is now in t2. We compute the final result:
+	 *   s = s2 + q*h
+	 * All these operations are non-modular.
+	 *
+	 * We need mq, s2 and t2. We use the t3 buffer as destination.
+	 * The buffers mp, s1 and t1 are no longer needed, so we can
+	 * reuse them for t3. Moreover, the first step of the computation
+	 * is to copy s2 into t3, after which s2 is not needed. Right
+	 * now, mq is in slot 0, s2 is in slot 1, and t2 is in slot 5.
+	 * Therefore, we have ample room for t3 by simply using s2.
+	 */
+	t3 = s2;
+	br_i31_mulacc(t3, mq, t2);
+
+	/*
+	 * Encode the result. Since we already checked the value of xlen,
+	 * we can just use it right away.
+	 */
+	br_i31_encode(x, xlen, t3);
+
+	/*
+	 * The only error conditions remaining at that point are invalid
+	 * values for p and q (even integers).
+	 */
+	return p0i & q0i & r;
+}
diff --git a/third_party/bearssl/src/rsa_i31_privexp.c b/third_party/bearssl/src/rsa_i31_privexp.c
new file mode 100644
index 0000000..eee62a0
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_privexp.c
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i31_compute_privexp(void *d,
+	const br_rsa_private_key *sk, uint32_t e)
+{
+	/*
+	 * We want to invert e modulo phi = (p-1)(q-1). This first
+	 * requires computing phi, which is easy since we have the factors
+	 * p and q in the private key structure.
+	 *
+	 * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.
+	 * We could invert e modulo phi/4 then patch the result to
+	 * modulo phi, but this would involve assembling three modulus-wide
+	 * values (phi/4, 1 and e) and calling moddiv, that requires
+	 * three more temporaries, for a total of six big integers, or
+	 * slightly more than 3 kB of stack space for RSA-4096. This
+	 * exceeds our stack requirements.
+	 *
+	 * Instead, we first use one step of the extended GCD:
+	 *
+	 *   - We compute phi = k*e + r  (Euclidean division of phi by e).
+	 *     If public exponent e is correct, then r != 0 (e must be
+	 *     invertible modulo phi). We also have k != 0 since we
+	 *     enforce non-ridiculously-small factors.
+	 *
+	 *   - We find small u, v such that u*e - v*r = 1  (using a
+	 *     binary GCD; we can arrange for u < r and v < e, i.e. all
+	 *     values fit on 32 bits).
+	 *
+	 *   - Solution is: d = u + v*k
+	 *     This last computation is exact: since u < r and v < e,
+	 *     the above implies d < r + e*((phi-r)/e) = phi
+	 */
+
+	uint32_t tmp[4 * ((BR_MAX_RSA_FACTOR + 30) / 31) + 12];
+	uint32_t *p, *q, *k, *m, *z, *phi;
+	const unsigned char *pbuf, *qbuf;
+	size_t plen, qlen, u, len, dlen;
+	uint32_t r, a, b, u0, v0, u1, v1, he, hr;
+	int i;
+
+	/*
+	 * Check that e is correct.
+	 */
+	if (e < 3 || (e & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Check lengths of p and q, and that they are both odd.
+	 */
+	pbuf = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	if (plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)
+		|| (pbuf[plen - 1] & 1) != 1)
+	{
+		return 0;
+	}
+	qbuf = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *qbuf == 0) {
+		qbuf ++;
+		qlen --;
+	}
+	if (qlen < 5 || qlen > (BR_MAX_RSA_FACTOR / 8)
+		|| (qbuf[qlen - 1] & 1) != 1)
+	{
+		return 0;
+	}
+
+	/*
+	 * Output length is that of the modulus.
+	 */
+	dlen = (sk->n_bitlen + 7) >> 3;
+	if (d == NULL) {
+		return dlen;
+	}
+
+	p = tmp;
+	br_i31_decode(p, pbuf, plen);
+	plen = (p[0] + 31) >> 5;
+	q = p + 1 + plen;
+	br_i31_decode(q, qbuf, qlen);
+	qlen = (q[0] + 31) >> 5;
+
+	/*
+	 * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that
+	 * we do not need anymore). The mulacc function sets the announced
+	 * bit length of t to be the sum of the announced bit lengths of
+	 * p-1 and q-1, which is usually exact but may overshoot by one 1
+	 * bit in some cases; we readjust it to its true length.
+	 */
+	p[1] --;
+	q[1] --;
+	phi = q + 1 + qlen;
+	br_i31_zero(phi, p[0]);
+	br_i31_mulacc(phi, p, q);
+	len = (phi[0] + 31) >> 5;
+	memmove(tmp, phi, (1 + len) * sizeof *phi);
+	phi = tmp;
+	phi[0] = br_i31_bit_length(phi + 1, len);
+	len = (phi[0] + 31) >> 5;
+
+	/*
+	 * Divide phi by public exponent e. The final remainder r must be
+	 * non-zero (otherwise, the key is invalid). The quotient is k,
+	 * which we write over phi, since we don't need phi after that.
+	 */
+	r = 0;
+	for (u = len; u >= 1; u --) {
+		/*
+		 * Upon entry, r < e, and phi[u] < 2^31; hence,
+		 * hi:lo < e*2^31. Thus, the produced word k[u]
+		 * must be lower than 2^31, and the new remainder r
+		 * is lower than e.
+		 */
+		uint32_t hi, lo;
+
+		hi = r >> 1;
+		lo = (r << 31) + phi[u];
+		phi[u] = br_divrem(hi, lo, e, &r);
+	}
+	if (r == 0) {
+		return 0;
+	}
+	k = phi;
+
+	/*
+	 * Compute u and v such that u*e - v*r = GCD(e,r). We use
+	 * a binary GCD algorithm, with 6 extra integers a, b,
+	 * u0, u1, v0 and v1. Initial values are:
+	 *   a = e    u0 = 1   v0 = 0
+	 *   b = r    u1 = r   v1 = e-1
+	 * The following invariants are maintained:
+	 *   a = u0*e - v0*r
+	 *   b = u1*e - v1*r
+	 *   0 < a <= e
+	 *   0 < b <= r
+	 *   0 <= u0 <= r
+	 *   0 <= v0 <= e
+	 *   0 <= u1 <= r
+	 *   0 <= v1 <= e
+	 *
+	 * At each iteration, we reduce either a or b by one bit, and
+	 * adjust u0, u1, v0 and v1 to maintain the invariants:
+	 *  - if a is even, then a <- a/2
+	 *  - otherwise, if b is even, then b <- b/2
+	 *  - otherwise, if a > b, then a <- (a-b)/2
+	 *  - otherwise, if b > a, then b <- (b-a)/2
+	 * Algorithm stops when a = b. At that point, the common value
+	 * is the GCD of e and r; it must be 1 (otherwise, the private
+	 * key or public exponent is not valid). The (u0,v0) or (u1,v1)
+	 * pairs are the solution we are looking for.
+	 *
+	 * Since either a or b is reduced by at least 1 bit at each
+	 * iteration, 62 iterations are enough to reach the end
+	 * condition.
+	 *
+	 * To maintain the invariants, we must compute the same operations
+	 * on the u* and v* values that we do on a and b:
+	 *  - When a is divided by 2, u0 and v0 must be divided by 2.
+	 *  - When b is divided by 2, u1 and v1 must be divided by 2.
+	 *  - When b is subtracted from a, u1 and v1 are subtracted from
+	 *    u0 and v0, respectively.
+	 *  - When a is subtracted from b, u0 and v0 are subtracted from
+	 *    u1 and v1, respectively.
+	 *
+	 * However, we want to keep the u* and v* values in their proper
+	 * ranges. The following remarks apply:
+	 *
+	 *  - When a is divided by 2, then a is even. Therefore:
+	 *
+	 *     * If r is odd, then u0 and v0 must have the same parity;
+	 *       if they are both odd, then adding r to u0 and e to v0
+	 *       makes them both even, and the division by 2 brings them
+	 *       back to the proper range.
+	 *
+	 *     * If r is even, then u0 must be even; if v0 is odd, then
+	 *       adding r to u0 and e to v0 makes them both even, and the
+	 *       division by 2 brings them back to the proper range.
+	 *
+	 *    Thus, all we need to do is to look at the parity of v0,
+	 *    and add (r,e) to (u0,v0) when v0 is odd. In order to avoid
+	 *    a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the
+	 *    division (r+1 does not overflow since r < e; and (e/2)+1
+	 *    is equal to (e+1)/2 since e is odd).
+	 *
+	 *  - When we subtract b from a, three cases may occur:
+	 *
+	 *     * u1 <= u0 and v1 <= v0: just do the subtractions
+	 *
+	 *     * u1 > u0 and v1 > v0: compute:
+	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
+	 *
+	 *     * u1 <= u0 and v1 > v0: compute:
+	 *         (u0, v0) <- (u0 + r - u1, v0 + e - v1)
+	 *
+	 *    The fourth case (u1 > u0 and v1 <= v0) is not possible
+	 *    because it would contradict "b < a" (which is the reason
+	 *    why we subtract b from a).
+	 *
+	 *    The tricky case is the third one: from the equations, it
+	 *    seems that u0 may go out of range. However, the invariants
+	 *    and ranges of other values imply that, in that case, the
+	 *    new u0 does not actually exceed the range.
+	 *
+	 *    We can thus handle the subtraction by adding (r,e) based
+	 *    solely on the comparison between v0 and v1.
+	 */
+	a = e;
+	b = r;
+	u0 = 1;
+	v0 = 0;
+	u1 = r;
+	v1 = e - 1;
+	hr = (r + 1) >> 1;
+	he = (e >> 1) + 1;
+	for (i = 0; i < 62; i ++) {
+		uint32_t oa, ob, agtb, bgta;
+		uint32_t sab, sba, da, db;
+		uint32_t ctl;
+
+		oa = a & 1;                  /* 1 if a is odd */
+		ob = b & 1;                  /* 1 if b is odd */
+		agtb = GT(a, b);             /* 1 if a > b */
+		bgta = GT(b, a);             /* 1 if b > a */
+
+		sab = oa & ob & agtb;        /* 1 if a <- a-b */
+		sba = oa & ob & bgta;        /* 1 if b <- b-a */
+
+		/* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */
+		ctl = GT(v1, v0);
+		a -= b & -sab;
+		u0 -= (u1 - (r & -ctl)) & -sab;
+		v0 -= (v1 - (e & -ctl)) & -sab;
+
+		/* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */
+		ctl = GT(v0, v1);
+		b -= a & -sba;
+		u1 -= (u0 - (r & -ctl)) & -sba;
+		v1 -= (v0 - (e & -ctl)) & -sba;
+
+		da = NOT(oa) | sab;          /* 1 if a <- a/2 */
+		db = (oa & NOT(ob)) | sba;   /* 1 if b <- b/2 */
+
+		/* a <- a/2, u0 <- u0/2, v0 <- v0/2 */
+		ctl = v0 & 1;
+		a ^= (a ^ (a >> 1)) & -da;
+		u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da;
+		v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da;
+
+		/* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */
+		ctl = v1 & 1;
+		b ^= (b ^ (b >> 1)) & -db;
+		u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db;
+		v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db;
+	}
+
+	/*
+	 * Check that the GCD is indeed 1. If not, then the key is invalid
+	 * (and there's no harm in leaking that piece of information).
+	 */
+	if (a != 1) {
+		return 0;
+	}
+
+	/*
+	 * Now we have u0*e - v0*r = 1. Let's compute the result as:
+	 *   d = u0 + v0*k
+	 * We still have k in the tmp[] array, and its announced bit
+	 * length is that of phi.
+	 */
+	m = k + 1 + len;
+	m[0] = (1 << 5) + 1;  /* bit length is 32 bits, encoded */
+	m[1] = v0 & 0x7FFFFFFF;
+	m[2] = v0 >> 31;
+	z = m + 3;
+	br_i31_zero(z, k[0]);
+	z[1] = u0 & 0x7FFFFFFF;
+	z[2] = u0 >> 31;
+	br_i31_mulacc(z, k, m);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i31_encode(d, dlen, z);
+	return dlen;
+}
diff --git a/third_party/bearssl/src/rsa_i31_pss_sign.c b/third_party/bearssl/src/rsa_i31_pss_sign.c
new file mode 100644
index 0000000..b06f3e2
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pss_sign.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash,
+		salt_len, sk->n_bitlen, x))
+	{
+		return 0;
+	}
+	return br_rsa_i31_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i31_pss_vrfy.c b/third_party/bearssl/src/rsa_i31_pss_vrfy.c
new file mode 100644
index 0000000..77a9b28
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pss_vrfy.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i31_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pss_sig_unpad(hf_data, hf_mgf1,
+		hash, salt_len, pk, sig);
+}
diff --git a/third_party/bearssl/src/rsa_i31_pub.c b/third_party/bearssl/src/rsa_i31_pub.c
new file mode 100644
index 0000000..d5f3fe2
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pub.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * As a strict minimum, we need four buffers that can hold a
+ * modular integer.
+ */
+#define TLEN   (4 * (2 + ((BR_MAX_RSA_SIZE + 30) / 31)))
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk)
+{
+	const unsigned char *n;
+	size_t nlen;
+	uint32_t tmp[1 + TLEN];
+	uint32_t *m, *a, *t;
+	size_t fwlen;
+	long z;
+	uint32_t m0i, r;
+
+	/*
+	 * Get the actual length of the modulus, and see if it fits within
+	 * our stack buffer. We also check that the length of x[] is valid.
+	 */
+	n = pk->n;
+	nlen = pk->nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+	if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) {
+		return 0;
+	}
+	z = (long)nlen << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 31;
+		fwlen ++;
+	}
+	/*
+	 * Round up length to an even number.
+	 */
+	fwlen += (fwlen & 1);
+
+	/*
+	 * The modulus gets decoded into m[].
+	 * The value to exponentiate goes into a[].
+	 * The temporaries for modular exponentiation are in t[].
+	 */
+	m = tmp;
+	a = m + fwlen;
+	t = m + 2 * fwlen;
+
+	/*
+	 * Decode the modulus.
+	 */
+	br_i31_decode(m, n, nlen);
+	m0i = br_i31_ninv31(m[1]);
+
+	/*
+	 * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be
+	 * an odd integer.
+	 */
+	r = m0i & 1;
+
+	/*
+	 * Decode x[] into a[]; we also check that its value is proper.
+	 */
+	r &= br_i31_decode_mod(a, x, xlen, m);
+
+	/*
+	 * Compute the modular exponentiation.
+	 */
+	br_i31_modpow_opt(a, pk->e, pk->elen, m, m0i, t, TLEN - 2 * fwlen);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i31_encode(x, xlen, a);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i31_pubexp.c b/third_party/bearssl/src/rsa_i31_pubexp.c
new file mode 100644
index 0000000..f26537d
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i31_pubexp.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Recompute public exponent, based on factor p and reduced private
+ * exponent dp.
+ */
+static uint32_t
+get_pubexp(const unsigned char *pbuf, size_t plen,
+	const unsigned char *dpbuf, size_t dplen)
+{
+	/*
+	 * dp is the inverse of e modulo p-1. If p = 3 mod 4, then
+	 * p-1 = 2*((p-1)/2). Taken modulo 2, e is odd and has inverse 1;
+	 * thus, dp must be odd.
+	 *
+	 * We compute the inverse of dp modulo (p-1)/2. This requires
+	 * first reducing dp modulo (p-1)/2 (this can be done with a
+	 * conditional subtract, no need to use the generic modular
+	 * reduction function); then, we use moddiv.
+	 */
+
+	uint32_t tmp[6 * ((BR_MAX_RSA_FACTOR + 61) / 31)];
+	uint32_t *p, *dp, *x;
+	size_t len;
+	uint32_t e;
+
+	/*
+	 * Compute actual factor length (in bytes) and check that it fits
+	 * under our size constraints.
+	 */
+	while (plen > 0 && *pbuf == 0) {
+		pbuf ++;
+		plen --;
+	}
+	if (plen == 0 || plen < 5 || plen > (BR_MAX_RSA_FACTOR / 8)) {
+		return 0;
+	}
+
+	/*
+	 * Compute actual reduced exponent length (in bytes) and check that
+	 * it is not longer than p.
+	 */
+	while (dplen > 0 && *dpbuf == 0) {
+		dpbuf ++;
+		dplen --;
+	}
+	if (dplen > plen || dplen == 0
+		|| (dplen == plen && dpbuf[0] > pbuf[0]))
+	{
+		return 0;
+	}
+
+	/*
+	 * Verify that p = 3 mod 4 and that dp is odd.
+	 */
+	if ((pbuf[plen - 1] & 3) != 3 || (dpbuf[dplen - 1] & 1) != 1) {
+		return 0;
+	}
+
+	/*
+	 * Decode p and compute (p-1)/2.
+	 */
+	p = tmp;
+	br_i31_decode(p, pbuf, plen);
+	len = (p[0] + 63) >> 5;
+	br_i31_rshift(p, 1);
+
+	/*
+	 * Decode dp and make sure its announced bit length matches that of
+	 * p (we already know that the size of dp, in bits, does not exceed
+	 * the size of p, so we just have to copy the header word).
+	 */
+	dp = p + len;
+	memset(dp, 0, len * sizeof *dp);
+	br_i31_decode(dp, dpbuf, dplen);
+	dp[0] = p[0];
+
+	/*
+	 * Subtract (p-1)/2 from dp if necessary.
+	 */
+	br_i31_sub(dp, p, NOT(br_i31_sub(dp, p, 0)));
+
+	/*
+	 * If another subtraction is needed, then this means that the
+	 * value was invalid. We don't care to leak information about
+	 * invalid keys.
+	 */
+	if (br_i31_sub(dp, p, 0) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Invert dp modulo (p-1)/2. If the inversion fails, then the
+	 * key value was invalid.
+	 */
+	x = dp + len;
+	br_i31_zero(x, p[0]);
+	x[1] = 1;
+	if (br_i31_moddiv(x, dp, p, br_i31_ninv31(p[1]), x + len) == 0) {
+		return 0;
+	}
+
+	/*
+	 * We now have an inverse. We must set it to zero (error) if its
+	 * length is greater than 32 bits and/or if it is an even integer.
+	 * Take care that the bit_length function returns an encoded
+	 * bit length.
+	 */
+	e = (uint32_t)x[1] | ((uint32_t)x[2] << 31);
+	e &= -LT(br_i31_bit_length(x + 1, len - 1), 34);
+	e &= -(e & 1);
+	return e;
+}
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i31_compute_pubexp(const br_rsa_private_key *sk)
+{
+	/*
+	 * Get the public exponent from both p and q. This is the right
+	 * exponent if we get twice the same value.
+	 */
+	uint32_t ep, eq;
+
+	ep = get_pubexp(sk->p, sk->plen, sk->dp, sk->dplen);
+	eq = get_pubexp(sk->q, sk->qlen, sk->dq, sk->dqlen);
+	return ep & -EQ(ep, eq);
+}
diff --git a/third_party/bearssl/src/rsa_i32_oaep_decrypt.c b/third_party/bearssl/src/rsa_i32_oaep_decrypt.c
new file mode 100644
index 0000000..ecfd92b
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_oaep_decrypt.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_oaep_decrypt(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len)
+{
+	uint32_t r;
+
+	if (*len != ((sk->n_bitlen + 7) >> 3)) {
+		return 0;
+	}
+	r = br_rsa_i32_private(data, sk);
+	r &= br_rsa_oaep_unpad(dig, label, label_len, data, len);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i32_oaep_encrypt.c b/third_party/bearssl/src/rsa_i32_oaep_encrypt.c
new file mode 100644
index 0000000..dc17f3f
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_oaep_encrypt.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i32_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t dlen;
+
+	dlen = br_rsa_oaep_pad(rnd, dig, label, label_len,
+		pk, dst, dst_max_len, src, src_len);
+	if (dlen == 0) {
+		return 0;
+	}
+	return dlen & -(size_t)br_rsa_i32_public(dst, dlen, pk);
+}
diff --git a/third_party/bearssl/src/rsa_i32_pkcs1_sign.c b/third_party/bearssl/src/rsa_i32_pkcs1_sign.c
new file mode 100644
index 0000000..44b6e6d
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pkcs1_sign.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
+	}
+	return br_rsa_i32_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i32_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i32_pkcs1_vrfy.c
new file mode 100644
index 0000000..6ee7a19
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pkcs1_vrfy.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i32_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
+}
diff --git a/third_party/bearssl/src/rsa_i32_priv.c b/third_party/bearssl/src/rsa_i32_priv.c
new file mode 100644
index 0000000..05c22ec
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_priv.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define U   (1 + (BR_MAX_RSA_FACTOR >> 5))
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_private(unsigned char *x, const br_rsa_private_key *sk)
+{
+	const unsigned char *p, *q;
+	size_t plen, qlen;
+	uint32_t tmp[6 * U];
+	uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+	uint32_t p0i, q0i;
+	size_t xlen, u;
+	uint32_t r;
+
+	/*
+	 * All our temporary buffers are from the tmp[] array.
+	 *
+	 * The mp, mq, s1, s2, t1 and t2 buffers are large enough to
+	 * contain a RSA factor. The t3 buffer can contain a complete
+	 * RSA modulus. t3 shares its storage space with s2, s1 and t1,
+	 * in that order (this is important, see below).
+	 */
+	mq = tmp;
+	mp = tmp + U;
+	t2 = tmp + 2 * U;
+	s2 = tmp + 3 * U;
+	s1 = tmp + 4 * U;
+	t1 = tmp + 5 * U;
+	t3 = s2;
+
+	/*
+	 * Compute the actual lengths (in bytes) of p and q, and check
+	 * that they fit within our stack buffers.
+	 */
+	p = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *p == 0) {
+		p ++;
+		plen --;
+	}
+	q = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *q == 0) {
+		q ++;
+		qlen --;
+	}
+	if (plen > (BR_MAX_RSA_FACTOR >> 3)
+		|| qlen > (BR_MAX_RSA_FACTOR >> 3))
+	{
+		return 0;
+	}
+
+	/*
+	 * Decode p and q.
+	 */
+	br_i32_decode(mp, p, plen);
+	br_i32_decode(mq, q, qlen);
+
+	/*
+	 * Recompute modulus, to compare with the source value.
+	 */
+	br_i32_zero(t2, mp[0]);
+	br_i32_mulacc(t2, mp, mq);
+	xlen = (sk->n_bitlen + 7) >> 3;
+	br_i32_encode(t2 + 2 * U, xlen, t2);
+	u = xlen;
+	r = 0;
+	while (u > 0) {
+		uint32_t wn, wx;
+
+		u --;
+		wn = ((unsigned char *)(t2 + 2 * U))[u];
+		wx = x[u];
+		r = ((wx - (wn + r)) >> 8) & 1;
+	}
+
+	/*
+	 * Compute s1 = x^dp mod p.
+	 */
+	p0i = br_i32_ninv32(mp[1]);
+	br_i32_decode_reduce(s1, x, xlen, mp);
+	br_i32_modpow(s1, sk->dp, sk->dplen, mp, p0i, t1, t2);
+
+	/*
+	 * Compute s2 = x^dq mod q.
+	 */
+	q0i = br_i32_ninv32(mq[1]);
+	br_i32_decode_reduce(s2, x, xlen, mq);
+	br_i32_modpow(s2, sk->dq, sk->dqlen, mq, q0i, t1, t2);
+
+	/*
+	 * Compute:
+	 *   h = (s1 - s2)*(1/q) mod p
+	 * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
+	 * unclear about whether p may be lower than q (some existing,
+	 * widely deployed implementations of RSA don't tolerate p < q),
+	 * but we want to support that occurrence, so we need to use the
+	 * reduction function.
+	 *
+	 * Since we use br_i32_decode_reduce() for iq (purportedly, the
+	 * inverse of q modulo p), we also tolerate improperly large
+	 * values for this parameter.
+	 */
+	br_i32_reduce(t2, s2, mp);
+	br_i32_add(s1, mp, br_i32_sub(s1, t2, 1));
+	br_i32_to_monty(s1, mp);
+	br_i32_decode_reduce(t1, sk->iq, sk->iqlen, mp);
+	br_i32_montymul(t2, s1, t1, mp, p0i);
+
+	/*
+	 * h is now in t2. We compute the final result:
+	 *   s = s2 + q*h
+	 * All these operations are non-modular.
+	 *
+	 * We need mq, s2 and t2. We use the t3 buffer as destination.
+	 * The buffers mp, s1 and t1 are no longer needed. Moreover,
+	 * the first step is to copy s2 into the destination buffer t3.
+	 * We thus arranged for t3 to actually share space with s2, and
+	 * to be followed by the space formerly used by s1 and t1.
+	 */
+	br_i32_mulacc(t3, mq, t2);
+
+	/*
+	 * Encode the result. Since we already checked the value of xlen,
+	 * we can just use it right away.
+	 */
+	br_i32_encode(x, xlen, t3);
+
+	/*
+	 * The only error conditions remaining at that point are invalid
+	 * values for p and q (even integers).
+	 */
+	return p0i & q0i & r;
+}
diff --git a/third_party/bearssl/src/rsa_i32_pss_sign.c b/third_party/bearssl/src/rsa_i32_pss_sign.c
new file mode 100644
index 0000000..0f72f92
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pss_sign.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash,
+		salt_len, sk->n_bitlen, x))
+	{
+		return 0;
+	}
+	return br_rsa_i32_private(x, sk);
+}
diff --git a/third_party/bearssl/src/rsa_i32_pss_vrfy.c b/third_party/bearssl/src/rsa_i32_pss_vrfy.c
new file mode 100644
index 0000000..2e70d23
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pss_vrfy.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i32_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pss_sig_unpad(hf_data, hf_mgf1,
+		hash, salt_len, pk, sig);
+}
diff --git a/third_party/bearssl/src/rsa_i32_pub.c b/third_party/bearssl/src/rsa_i32_pub.c
new file mode 100644
index 0000000..6e8d8e3
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i32_pub.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i32_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk)
+{
+	const unsigned char *n;
+	size_t nlen;
+	uint32_t m[1 + (BR_MAX_RSA_SIZE >> 5)];
+	uint32_t a[1 + (BR_MAX_RSA_SIZE >> 5)];
+	uint32_t t1[1 + (BR_MAX_RSA_SIZE >> 5)];
+	uint32_t t2[1 + (BR_MAX_RSA_SIZE >> 5)];
+	uint32_t m0i, r;
+
+	/*
+	 * Get the actual length of the modulus, and see if it fits within
+	 * our stack buffer. We also check that the length of x[] is valid.
+	 */
+	n = pk->n;
+	nlen = pk->nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+	if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) {
+		return 0;
+	}
+	br_i32_decode(m, n, nlen);
+	m0i = br_i32_ninv32(m[1]);
+
+	/*
+	 * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be
+	 * an odd integer.
+	 */
+	r = m0i & 1;
+
+	/*
+	 * Decode x[] into a[]; we also check that its value is proper.
+	 */
+	r &= br_i32_decode_mod(a, x, xlen, m);
+
+	/*
+	 * Compute the modular exponentiation.
+	 */
+	br_i32_modpow(a, pk->e, pk->elen, m, m0i, t1, t2);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i32_encode(x, xlen, a);
+	return r;
+}
diff --git a/third_party/bearssl/src/rsa_i62_keygen.c b/third_party/bearssl/src/rsa_i62_keygen.c
new file mode 100644
index 0000000..992fe97
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_keygen.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_keygen(const br_prng_class **rng,
+	br_rsa_private_key *sk, void *kbuf_priv,
+	br_rsa_public_key *pk, void *kbuf_pub,
+	unsigned size, uint32_t pubexp)
+{
+	return br_rsa_i31_keygen_inner(rng,
+		sk, kbuf_priv, pk, kbuf_pub, size, pubexp,
+		&br_i62_modpow_opt_as_i31);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_keygen
+br_rsa_i62_keygen_get(void)
+{
+	return &br_rsa_i62_keygen;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_keygen
+br_rsa_i62_keygen_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_oaep_decrypt.c b/third_party/bearssl/src/rsa_i62_oaep_decrypt.c
new file mode 100644
index 0000000..38470dd
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_oaep_decrypt.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_oaep_decrypt(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_private_key *sk, void *data, size_t *len)
+{
+	uint32_t r;
+
+	if (*len != ((sk->n_bitlen + 7) >> 3)) {
+		return 0;
+	}
+	r = br_rsa_i62_private(data, sk);
+	r &= br_rsa_oaep_unpad(dig, label, label_len, data, len);
+	return r;
+}
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_decrypt
+br_rsa_i62_oaep_decrypt_get(void)
+{
+	return &br_rsa_i62_oaep_decrypt;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_decrypt
+br_rsa_i62_oaep_decrypt_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_oaep_encrypt.c b/third_party/bearssl/src/rsa_i62_oaep_encrypt.c
new file mode 100644
index 0000000..cf41ecb
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_oaep_encrypt.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+size_t
+br_rsa_i62_oaep_encrypt(
+	const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t dlen;
+
+	dlen = br_rsa_oaep_pad(rnd, dig, label, label_len,
+		pk, dst, dst_max_len, src, src_len);
+	if (dlen == 0) {
+		return 0;
+	}
+	return dlen & -(size_t)br_rsa_i62_public(dst, dlen, pk);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_encrypt
+br_rsa_i62_oaep_encrypt_get(void)
+{
+	return &br_rsa_i62_oaep_encrypt;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_oaep_encrypt
+br_rsa_i62_oaep_encrypt_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pkcs1_sign.c b/third_party/bearssl/src/rsa_i62_pkcs1_sign.c
new file mode 100644
index 0000000..a20a084
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pkcs1_sign.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
+	}
+	return br_rsa_i62_private(x, sk);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_sign
+br_rsa_i62_pkcs1_sign_get(void)
+{
+	return &br_rsa_i62_pkcs1_sign;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_sign
+br_rsa_i62_pkcs1_sign_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pkcs1_vrfy.c b/third_party/bearssl/src/rsa_i62_pkcs1_vrfy.c
new file mode 100644
index 0000000..6519161
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pkcs1_vrfy.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i62_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_vrfy
+br_rsa_i62_pkcs1_vrfy_get(void)
+{
+	return &br_rsa_i62_pkcs1_vrfy;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_pkcs1_vrfy
+br_rsa_i62_pkcs1_vrfy_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_priv.c b/third_party/bearssl/src/rsa_i62_priv.c
new file mode 100644
index 0000000..f0da600
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_priv.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+#define U      (2 + ((BR_MAX_RSA_FACTOR + 30) / 31))
+#define TLEN   (4 * U)  /* TLEN is counted in 64-bit words */
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_private(unsigned char *x, const br_rsa_private_key *sk)
+{
+	const unsigned char *p, *q;
+	size_t plen, qlen;
+	size_t fwlen;
+	uint32_t p0i, q0i;
+	size_t xlen, u;
+	uint64_t tmp[TLEN];
+	long z;
+	uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+	uint32_t r;
+
+	/*
+	 * Compute the actual lengths of p and q, in bytes.
+	 * These lengths are not considered secret (we cannot really hide
+	 * them anyway in constant-time code).
+	 */
+	p = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *p == 0) {
+		p ++;
+		plen --;
+	}
+	q = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *q == 0) {
+		q ++;
+		qlen --;
+	}
+
+	/*
+	 * Compute the maximum factor length, in words.
+	 */
+	z = (long)(plen > qlen ? plen : qlen) << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 31;
+		fwlen ++;
+	}
+
+	/*
+	 * Convert size to 62-bit words.
+	 */
+	fwlen = (fwlen + 1) >> 1;
+
+	/*
+	 * We need to fit at least 6 values in the stack buffer.
+	 */
+	if (6 * fwlen > TLEN) {
+		return 0;
+	}
+
+	/*
+	 * Compute signature length (in bytes).
+	 */
+	xlen = (sk->n_bitlen + 7) >> 3;
+
+	/*
+	 * Decode q.
+	 */
+	mq = (uint32_t *)tmp;
+	br_i31_decode(mq, q, qlen);
+
+	/*
+	 * Decode p.
+	 */
+	t1 = (uint32_t *)(tmp + fwlen);
+	br_i31_decode(t1, p, plen);
+
+	/*
+	 * Compute the modulus (product of the two factors), to compare
+	 * it with the source value. We use br_i31_mulacc(), since it's
+	 * already used later on.
+	 */
+	t2 = (uint32_t *)(tmp + 2 * fwlen);
+	br_i31_zero(t2, mq[0]);
+	br_i31_mulacc(t2, mq, t1);
+
+	/*
+	 * We encode the modulus into bytes, to perform the comparison
+	 * with bytes. We know that the product length, in bytes, is
+	 * exactly xlen.
+	 * The comparison actually computes the carry when subtracting
+	 * the modulus from the source value; that carry must be 1 for
+	 * a value in the correct range. We keep it in r, which is our
+	 * accumulator for the error code.
+	 */
+	t3 = (uint32_t *)(tmp + 4 * fwlen);
+	br_i31_encode(t3, xlen, t2);
+	u = xlen;
+	r = 0;
+	while (u > 0) {
+		uint32_t wn, wx;
+
+		u --;
+		wn = ((unsigned char *)t3)[u];
+		wx = x[u];
+		r = ((wx - (wn + r)) >> 8) & 1;
+	}
+
+	/*
+	 * Move the decoded p to another temporary buffer.
+	 */
+	mp = (uint32_t *)(tmp + 2 * fwlen);
+	memmove(mp, t1, 2 * fwlen * sizeof *t1);
+
+	/*
+	 * Compute s2 = x^dq mod q.
+	 */
+	q0i = br_i31_ninv31(mq[1]);
+	s2 = (uint32_t *)(tmp + fwlen);
+	br_i31_decode_reduce(s2, x, xlen, mq);
+	r &= br_i62_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i,
+		tmp + 3 * fwlen, TLEN - 3 * fwlen);
+
+	/*
+	 * Compute s1 = x^dp mod p.
+	 */
+	p0i = br_i31_ninv31(mp[1]);
+	s1 = (uint32_t *)(tmp + 3 * fwlen);
+	br_i31_decode_reduce(s1, x, xlen, mp);
+	r &= br_i62_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i,
+		tmp + 4 * fwlen, TLEN - 4 * fwlen);
+
+	/*
+	 * Compute:
+	 *   h = (s1 - s2)*(1/q) mod p
+	 * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
+	 * unclear about whether p may be lower than q (some existing,
+	 * widely deployed implementations of RSA don't tolerate p < q),
+	 * but we want to support that occurrence, so we need to use the
+	 * reduction function.
+	 *
+	 * Since we use br_i31_decode_reduce() for iq (purportedly, the
+	 * inverse of q modulo p), we also tolerate improperly large
+	 * values for this parameter.
+	 */
+	t1 = (uint32_t *)(tmp + 4 * fwlen);
+	t2 = (uint32_t *)(tmp + 5 * fwlen);
+	br_i31_reduce(t2, s2, mp);
+	br_i31_add(s1, mp, br_i31_sub(s1, t2, 1));
+	br_i31_to_monty(s1, mp);
+	br_i31_decode_reduce(t1, sk->iq, sk->iqlen, mp);
+	br_i31_montymul(t2, s1, t1, mp, p0i);
+
+	/*
+	 * h is now in t2. We compute the final result:
+	 *   s = s2 + q*h
+	 * All these operations are non-modular.
+	 *
+	 * We need mq, s2 and t2. We use the t3 buffer as destination.
+	 * The buffers mp, s1 and t1 are no longer needed, so we can
+	 * reuse them for t3. Moreover, the first step of the computation
+	 * is to copy s2 into t3, after which s2 is not needed. Right
+	 * now, mq is in slot 0, s2 is in slot 1, and t2 is in slot 5.
+	 * Therefore, we have ample room for t3 by simply using s2.
+	 */
+	t3 = s2;
+	br_i31_mulacc(t3, mq, t2);
+
+	/*
+	 * Encode the result. Since we already checked the value of xlen,
+	 * we can just use it right away.
+	 */
+	br_i31_encode(x, xlen, t3);
+
+	/*
+	 * The only error conditions remaining at that point are invalid
+	 * values for p and q (even integers).
+	 */
+	return p0i & q0i & r;
+}
+
+/* see bearssl_rsa.h */
+br_rsa_private
+br_rsa_i62_private_get(void)
+{
+	return &br_rsa_i62_private;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_private
+br_rsa_i62_private_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pss_sign.c b/third_party/bearssl/src/rsa_i62_pss_sign.c
new file mode 100644
index 0000000..7232f6d
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pss_sign.c
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_pss_sign(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pss_sig_pad(rng, hf_data, hf_mgf1, hash,
+		salt_len, sk->n_bitlen, x))
+	{
+		return 0;
+	}
+	return br_rsa_i62_private(x, sk);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_pss_sign
+br_rsa_i62_pss_sign_get(void)
+{
+	return &br_rsa_i62_pss_sign;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_pss_sign
+br_rsa_i62_pss_sign_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pss_vrfy.c b/third_party/bearssl/src/rsa_i62_pss_vrfy.c
new file mode 100644
index 0000000..e726e82
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pss_vrfy.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_pss_vrfy(const unsigned char *x, size_t xlen,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const void *hash, size_t salt_len, const br_rsa_public_key *pk)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i62_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pss_sig_unpad(hf_data, hf_mgf1,
+		hash, salt_len, pk, sig);
+}
+
+/* see bearssl_rsa.h */
+br_rsa_pss_vrfy
+br_rsa_i62_pss_vrfy_get(void)
+{
+	return &br_rsa_i62_pss_vrfy;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_pss_vrfy
+br_rsa_i62_pss_vrfy_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_i62_pub.c b/third_party/bearssl/src/rsa_i62_pub.c
new file mode 100644
index 0000000..70cf61b
--- /dev/null
+++ b/third_party/bearssl/src/rsa_i62_pub.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if BR_INT128 || BR_UMUL128
+
+/*
+ * As a strict minimum, we need four buffers that can hold a
+ * modular integer. But TLEN is expressed in 64-bit words.
+ */
+#define TLEN   (2 * (2 + ((BR_MAX_RSA_SIZE + 30) / 31)))
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i62_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk)
+{
+	const unsigned char *n;
+	size_t nlen;
+	uint64_t tmp[TLEN];
+	uint32_t *m, *a;
+	size_t fwlen;
+	long z;
+	uint32_t m0i, r;
+
+	/*
+	 * Get the actual length of the modulus, and see if it fits within
+	 * our stack buffer. We also check that the length of x[] is valid.
+	 */
+	n = pk->n;
+	nlen = pk->nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+	if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) {
+		return 0;
+	}
+	z = (long)nlen << 3;
+	fwlen = 1;
+	while (z > 0) {
+		z -= 31;
+		fwlen ++;
+	}
+	/*
+	 * Convert fwlen to a count in 62-bit words.
+	 */
+	fwlen = (fwlen + 1) >> 1;
+
+	/*
+	 * The modulus gets decoded into m[].
+	 * The value to exponentiate goes into a[].
+	 */
+	m = (uint32_t *)tmp;
+	a = (uint32_t *)(tmp + fwlen);
+
+	/*
+	 * Decode the modulus.
+	 */
+	br_i31_decode(m, n, nlen);
+	m0i = br_i31_ninv31(m[1]);
+
+	/*
+	 * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be
+	 * an odd integer.
+	 */
+	r = m0i & 1;
+
+	/*
+	 * Decode x[] into a[]; we also check that its value is proper.
+	 */
+	r &= br_i31_decode_mod(a, x, xlen, m);
+
+	/*
+	 * Compute the modular exponentiation.
+	 */
+	br_i62_modpow_opt(a, pk->e, pk->elen, m, m0i,
+		tmp + 2 * fwlen, TLEN - 2 * fwlen);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i31_encode(x, xlen, a);
+	return r;
+}
+
+/* see bearssl_rsa.h */
+br_rsa_public
+br_rsa_i62_public_get(void)
+{
+	return &br_rsa_i62_public;
+}
+
+#else
+
+/* see bearssl_rsa.h */
+br_rsa_public
+br_rsa_i62_public_get(void)
+{
+	return 0;
+}
+
+#endif
diff --git a/third_party/bearssl/src/rsa_oaep_pad.c b/third_party/bearssl/src/rsa_oaep_pad.c
new file mode 100644
index 0000000..5327dc2
--- /dev/null
+++ b/third_party/bearssl/src/rsa_oaep_pad.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Hash some data. This is put as a separate function so that stack
+ * allocation of the hash function context is done only for the duration
+ * of the hash.
+ */
+static void
+hash_data(const br_hash_class *dig, void *dst, const void *src, size_t len)
+{
+	br_hash_compat_context hc;
+
+	hc.vtable = dig;
+	dig->init(&hc.vtable);
+	dig->update(&hc.vtable, src, len);
+	dig->out(&hc.vtable, dst);
+}
+
+/* see inner.h */
+size_t
+br_rsa_oaep_pad(const br_prng_class **rnd, const br_hash_class *dig,
+	const void *label, size_t label_len,
+	const br_rsa_public_key *pk,
+	void *dst, size_t dst_max_len,
+	const void *src, size_t src_len)
+{
+	size_t k, hlen;
+	unsigned char *buf;
+
+	hlen = br_digest_size(dig);
+
+	/*
+	 * Compute actual modulus length (in bytes).
+	 */
+	k = pk->nlen;
+	while (k > 0 && pk->n[k - 1] == 0) {
+		k --;
+	}
+
+	/*
+	 * An error is reported if:
+	 *  - the modulus is too short;
+	 *  - the source message length is too long;
+	 *  - the destination buffer is too short.
+	 */
+	if (k < ((hlen << 1) + 2)
+		|| src_len > (k - (hlen << 1) - 2)
+		|| dst_max_len < k)
+	{
+		return 0;
+	}
+
+	/*
+	 * Apply padding. At this point, things cannot fail.
+	 */
+	buf = dst;
+
+	/*
+	 * Assemble: DB = lHash || PS || 0x01 || M
+	 * We first place the source message M with memmove(), so that
+	 * overlaps between source and destination buffers are supported.
+	 */
+	memmove(buf + k - src_len, src, src_len);
+	hash_data(dig, buf + 1 + hlen, label, label_len);
+	memset(buf + 1 + (hlen << 1), 0, k - src_len - (hlen << 1) - 2);
+	buf[k - src_len - 1] = 0x01;
+
+	/*
+	 * Make the random seed.
+	 */
+	(*rnd)->generate(rnd, buf + 1, hlen);
+
+	/*
+	 * Mask DB with the mask generated from the seed.
+	 */
+	br_mgf1_xor(buf + 1 + hlen, k - hlen - 1, dig, buf + 1, hlen);
+
+	/*
+	 * Mask the seed with the mask generated from the masked DB.
+	 */
+	br_mgf1_xor(buf + 1, hlen, dig, buf + 1 + hlen, k - hlen - 1);
+
+	/*
+	 * Padding result: EM = 0x00 || maskedSeed || maskedDB.
+	 */
+	buf[0] = 0x00;
+	return k;
+}
diff --git a/third_party/bearssl/src/rsa_oaep_unpad.c b/third_party/bearssl/src/rsa_oaep_unpad.c
new file mode 100644
index 0000000..7c4be6a
--- /dev/null
+++ b/third_party/bearssl/src/rsa_oaep_unpad.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Hash some data and XOR the result into the provided buffer. This is put
+ * as a separate function so that stack allocation of the hash function
+ * context is done only for the duration of the hash.
+ */
+static void
+xor_hash_data(const br_hash_class *dig, void *dst, const void *src, size_t len)
+{
+	br_hash_compat_context hc;
+	unsigned char tmp[64];
+	unsigned char *buf;
+	size_t u, hlen;
+
+	hc.vtable = dig;
+	dig->init(&hc.vtable);
+	dig->update(&hc.vtable, src, len);
+	dig->out(&hc.vtable, tmp);
+	buf = dst;
+	hlen = br_digest_size(dig);
+	for (u = 0; u < hlen; u ++) {
+		buf[u] ^= tmp[u];
+	}
+}
+
+/* see inner.h */
+uint32_t
+br_rsa_oaep_unpad(const br_hash_class *dig,
+	const void *label, size_t label_len,
+	void *data, size_t *len)
+{
+	size_t u, k, hlen;
+	unsigned char *buf;
+	uint32_t r, s, zlen;
+
+	hlen = br_digest_size(dig);
+	k = *len;
+	buf = data;
+
+	/*
+	 * There must be room for the padding.
+	 */
+	if (k < ((hlen << 1) + 2)) {
+		return 0;
+	}
+
+	/*
+	 * Unmask the seed, then the DB value.
+	 */
+	br_mgf1_xor(buf + 1, hlen, dig, buf + 1 + hlen, k - hlen - 1);
+	br_mgf1_xor(buf + 1 + hlen, k - hlen - 1, dig, buf + 1, hlen);
+
+	/*
+	 * Hash the label and XOR it with the value in the array; if
+	 * they are equal then these should yield only zeros.
+	 */
+	xor_hash_data(dig, buf + 1 + hlen, label, label_len);
+
+	/*
+	 * At that point, if the padding was correct, when we should
+	 * have: 0x00 || seed || 0x00 ... 0x00 0x01 || M
+	 * Padding is valid as long as:
+	 *  - There is at least hlen+1 leading bytes of value 0x00.
+	 *  - There is at least one non-zero byte.
+	 *  - The first (leftmost) non-zero byte has value 0x01.
+	 *
+	 * Ultimately, we may leak the resulting message length, i.e.
+	 * the position of the byte of value 0x01, but we must take care
+	 * to do so only if the number of zero bytes has been verified
+	 * to be at least hlen+1.
+	 *
+	 * The loop below counts the number of bytes of value 0x00, and
+	 * checks that the next byte has value 0x01, in constant-time.
+	 *
+	 *  - If the initial byte (before the seed) is not 0x00, then
+	 *    r and s are set to 0, and stay there.
+	 *  - Value r is 1 until the first non-zero byte is reached
+	 *    (after the seed); it switches to 0 at that point.
+	 *  - Value s is set to 1 if and only if the data encountered
+	 *    at the time of the transition of r from 1 to 0 has value
+	 *    exactly 0x01.
+	 *  - Value zlen counts the number of leading bytes of value zero
+	 *    (after the seed).
+	 */
+	r = 1 - ((buf[0] + 0xFF) >> 8);
+	s = 0;
+	zlen = 0;
+	for (u = hlen + 1; u < k; u ++) {
+		uint32_t w, nz;
+
+		w = buf[u];
+
+		/*
+		 * nz == 1 only for the first non-zero byte.
+		 */
+		nz = r & ((w + 0xFF) >> 8);
+		s |= nz & EQ(w, 0x01);
+		r &= NOT(nz);
+		zlen += r;
+	}
+
+	/*
+	 * Padding is correct only if s == 1, _and_ zlen >= hlen.
+	 */
+	s &= GE(zlen, (uint32_t)hlen);
+
+	/*
+	 * At that point, padding was verified, and we are now allowed
+	 * to make conditional jumps.
+	 */
+	if (s) {
+		size_t plen;
+
+		plen = 2 + hlen + zlen;
+		k -= plen;
+		memmove(buf, buf + plen, k);
+		*len = k;
+	}
+	return s;
+}
diff --git a/third_party/bearssl/src/rsa_pkcs1_sig_pad.c b/third_party/bearssl/src/rsa_pkcs1_sig_pad.c
new file mode 100644
index 0000000..06c3bd7
--- /dev/null
+++ b/third_party/bearssl/src/rsa_pkcs1_sig_pad.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	uint32_t n_bitlen, unsigned char *x)
+{
+	size_t u, x3, xlen;
+
+	/*
+	 * Padded hash value has format:
+	 *  00 01 FF .. FF 00 30 x1 30 x2 06 x3 OID 05 00 04 x4 HASH
+	 *
+	 * with the following rules:
+	 *
+	 *  -- Total length is equal to the modulus length (unsigned
+	 *     encoding).
+	 *
+	 *  -- There must be at least eight bytes of value 0xFF.
+	 *
+	 *  -- x4 is equal to the hash length (hash_len).
+	 *
+	 *  -- x3 is equal to the encoded OID value length (hash_oid[0]).
+	 *
+	 *  -- x2 = x3 + 4.
+	 *
+	 *  -- x1 = x2 + x4 + 4 = x3 + x4 + 8.
+	 *
+	 * Note: the "05 00" is optional (signatures with and without
+	 * that sequence exist in practice), but notes in PKCS#1 seem to
+	 * indicate that the presence of that sequence (specifically,
+	 * an ASN.1 NULL value for the hash parameters) may be slightly
+	 * more "standard" than the opposite.
+	 */
+	xlen = (n_bitlen + 7) >> 3;
+
+	if (hash_oid == NULL) {
+		if (xlen < hash_len + 11) {
+			return 0;
+		}
+		x[0] = 0x00;
+		x[1] = 0x01;
+		u = xlen - hash_len;
+		memset(x + 2, 0xFF, u - 3);
+		x[u - 1] = 0x00;
+	} else {
+		x3 = hash_oid[0];
+
+		/*
+		 * Check that there is enough room for all the elements,
+		 * including at least eight bytes of value 0xFF.
+		 */
+		if (xlen < (x3 + hash_len + 21)) {
+			return 0;
+		}
+		x[0] = 0x00;
+		x[1] = 0x01;
+		u = xlen - x3 - hash_len - 11;
+		memset(x + 2, 0xFF, u - 2);
+		x[u] = 0x00;
+		x[u + 1] = 0x30;
+		x[u + 2] = x3 + hash_len + 8;
+		x[u + 3] = 0x30;
+		x[u + 4] = x3 + 4;
+		x[u + 5] = 0x06;
+		memcpy(x + u + 6, hash_oid, x3 + 1);
+		u += x3 + 7;
+		x[u ++] = 0x05;
+		x[u ++] = 0x00;
+		x[u ++] = 0x04;
+		x[u ++] = hash_len;
+	}
+	memcpy(x + u, hash, hash_len);
+	return 1;
+}
diff --git a/third_party/bearssl/src/rsa_pkcs1_sig_unpad.c b/third_party/bearssl/src/rsa_pkcs1_sig_unpad.c
new file mode 100644
index 0000000..c8ae08f
--- /dev/null
+++ b/third_party/bearssl/src/rsa_pkcs1_sig_unpad.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len,
+	const unsigned char *hash_oid, size_t hash_len,
+	unsigned char *hash_out)
+{
+	static const unsigned char pad1[] = {
+		0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+	};
+
+	unsigned char pad2[43];
+	size_t u, x2, x3, pad_len, zlen;
+
+	if (sig_len < 11) {
+		return 0;
+	}
+
+	/*
+	 * Expected format:
+	 *  00 01 FF ... FF 00 30 x1 30 x2 06 x3 OID [ 05 00 ] 04 x4 HASH
+	 *
+	 * with the following rules:
+	 *
+	 *  -- Total length is that of the modulus and the signature
+	 *     (this was already verified by br_rsa_i31_public()).
+	 *
+	 *  -- There are at least eight bytes of value 0xFF.
+	 *
+	 *  -- x4 is equal to the hash length (hash_len).
+	 *
+	 *  -- x3 is equal to the encoded OID value length (so x3 is the
+	 *     first byte of hash_oid[]).
+	 *
+	 *  -- If the "05 00" is present, then x2 == x3 + 4; otherwise,
+	 *     x2 == x3 + 2.
+	 *
+	 *  -- x1 == x2 + x4 + 4.
+	 *
+	 * So the total length after the last "FF" is either x3 + x4 + 11
+	 * (with the "05 00") or x3 + x4 + 9 (without the "05 00").
+	 */
+
+	/*
+	 * Check the "00 01 FF .. FF 00" with at least eight 0xFF bytes.
+	 * The comparison is valid because we made sure that the signature
+	 * is at least 11 bytes long.
+	 */
+	if (memcmp(sig, pad1, sizeof pad1) != 0) {
+		return 0;
+	}
+	for (u = sizeof pad1; u < sig_len; u ++) {
+		if (sig[u] != 0xFF) {
+			break;
+		}
+	}
+
+	/*
+	 * Remaining length is sig_len - u bytes (including the 00 just
+	 * after the last FF). This must be equal to one of the two
+	 * possible values (depending on whether the "05 00" sequence is
+	 * present or not).
+	 */
+	if (hash_oid == NULL) {
+		if (sig_len - u != hash_len + 1 || sig[u] != 0x00) {
+			return 0;
+		}
+	} else {
+		x3 = hash_oid[0];
+		pad_len = x3 + 9;
+		memset(pad2, 0, pad_len);
+		zlen = sig_len - u - hash_len;
+		if (zlen == pad_len) {
+			x2 = x3 + 2;
+		} else if (zlen == pad_len + 2) {
+			x2 = x3 + 4;
+			pad_len = zlen;
+			pad2[pad_len - 4] = 0x05;
+		} else {
+			return 0;
+		}
+		pad2[1] = 0x30;
+		pad2[2] = x2 + hash_len + 4;
+		pad2[3] = 0x30;
+		pad2[4] = x2;
+		pad2[5] = 0x06;
+		memcpy(pad2 + 6, hash_oid, x3 + 1);
+		pad2[pad_len - 2] = 0x04;
+		pad2[pad_len - 1] = hash_len;
+		if (memcmp(pad2, sig + u, pad_len) != 0) {
+			return 0;
+		}
+	}
+	memcpy(hash_out, sig + sig_len - hash_len, hash_len);
+	return 1;
+}
diff --git a/third_party/bearssl/src/rsa_pss_sig_pad.c b/third_party/bearssl/src/rsa_pss_sig_pad.c
new file mode 100644
index 0000000..13e9027
--- /dev/null
+++ b/third_party/bearssl/src/rsa_pss_sig_pad.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_rsa_pss_sig_pad(const br_prng_class **rng,
+	const br_hash_class *hf_data, const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	uint32_t n_bitlen, unsigned char *x)
+{
+	size_t xlen, hash_len;
+	br_hash_compat_context hc;
+	unsigned char *salt, *seed;
+
+	hash_len = br_digest_size(hf_data);
+
+	/*
+	 * The padded string is one bit smaller than the modulus;
+	 * notably, if the modulus length is equal to 1 modulo 8, then
+	 * the padded string will be one _byte_ smaller, and the first
+	 * byte will be set to 0. We apply these transformations here.
+	 */
+	n_bitlen --;
+	if ((n_bitlen & 7) == 0) {
+		*x ++ = 0;
+	}
+	xlen = (n_bitlen + 7) >> 3;
+
+	/*
+	 * Check that the modulus is large enough for the hash value
+	 * length combined with the intended salt length.
+	 */
+	if (hash_len > xlen || salt_len > xlen
+		|| (hash_len + salt_len + 2) > xlen)
+	{
+		return 0;
+	}
+
+	/*
+	 * Produce a random salt.
+	 */
+	salt = x + xlen - hash_len - salt_len - 1;
+	if (salt_len != 0) {
+		(*rng)->generate(rng, salt, salt_len);
+	}
+
+	/*
+	 * Compute the seed for MGF1.
+	 */
+	seed = x + xlen - hash_len - 1;
+	hf_data->init(&hc.vtable);
+	memset(seed, 0, 8);
+	hf_data->update(&hc.vtable, seed, 8);
+	hf_data->update(&hc.vtable, hash, hash_len);
+	hf_data->update(&hc.vtable, salt, salt_len);
+	hf_data->out(&hc.vtable, seed);
+
+	/*
+	 * Prepare string PS (padded salt). The salt is already at the
+	 * right place.
+	 */
+	memset(x, 0, xlen - salt_len - hash_len - 2);
+	x[xlen - salt_len - hash_len - 2] = 0x01;
+
+	/*
+	 * Generate the mask and XOR it into PS.
+	 */
+	br_mgf1_xor(x, xlen - hash_len - 1, hf_mgf1, seed, hash_len);
+
+	/*
+	 * Clear the top bits to ensure the value is lower than the
+	 * modulus.
+	 */
+	x[0] &= 0xFF >> (((uint32_t)xlen << 3) - n_bitlen);
+
+	/*
+	 * The seed (H) is already in the right place. We just set the
+	 * last byte.
+	 */
+	x[xlen - 1] = 0xBC;
+
+	return 1;
+}
diff --git a/third_party/bearssl/src/rsa_pss_sig_unpad.c b/third_party/bearssl/src/rsa_pss_sig_unpad.c
new file mode 100644
index 0000000..0c6ae99
--- /dev/null
+++ b/third_party/bearssl/src/rsa_pss_sig_unpad.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_rsa_pss_sig_unpad(const br_hash_class *hf_data,
+	const br_hash_class *hf_mgf1,
+	const unsigned char *hash, size_t salt_len,
+	const br_rsa_public_key *pk, unsigned char *x)
+{
+	size_t u, xlen, hash_len;
+	br_hash_compat_context hc;
+	unsigned char *seed, *salt;
+	unsigned char tmp[64];
+	uint32_t r, n_bitlen;
+
+	hash_len = br_digest_size(hf_data);
+
+	/*
+	 * Value r will be set to a non-zero value is any test fails.
+	 */
+	r = 0;
+
+	/*
+	 * The value bit length (as an integer) must be strictly less than
+	 * that of the modulus.
+	 */
+	for (u = 0; u < pk->nlen; u ++) {
+		if (pk->n[u] != 0) {
+			break;
+		}
+	}
+	if (u == pk->nlen) {
+		return 0;
+	}
+	n_bitlen = BIT_LENGTH(pk->n[u]) + ((uint32_t)(pk->nlen - u - 1) << 3);
+	n_bitlen --;
+	if ((n_bitlen & 7) == 0) {
+		r |= *x ++;
+	} else {
+		r |= x[0] & (0xFF << (n_bitlen & 7));
+	}
+	xlen = (n_bitlen + 7) >> 3;
+
+	/*
+	 * Check that the modulus is large enough for the hash value
+	 * length combined with the intended salt length.
+	 */
+	if (hash_len > xlen || salt_len > xlen
+		|| (hash_len + salt_len + 2) > xlen)
+	{
+		return 0;
+	}
+
+	/*
+	 * Check value of rightmost byte.
+	 */
+	r |= x[xlen - 1] ^ 0xBC;
+
+	/*
+	 * Generate the mask and XOR it into the first bytes to reveal PS;
+	 * we must also mask out the leading bits.
+	 */
+	seed = x + xlen - hash_len - 1;
+	br_mgf1_xor(x, xlen - hash_len - 1, hf_mgf1, seed, hash_len);
+	if ((n_bitlen & 7) != 0) {
+		x[0] &= 0xFF >> (8 - (n_bitlen & 7));
+	}
+
+	/*
+	 * Check that all padding bytes have the expected value.
+	 */
+	for (u = 0; u < (xlen - hash_len - salt_len - 2); u ++) {
+		r |= x[u];
+	}
+	r |= x[xlen - hash_len - salt_len - 2] ^ 0x01;
+
+	/*
+	 * Recompute H.
+	 */
+	salt = x + xlen - hash_len - salt_len - 1;
+	hf_data->init(&hc.vtable);
+	memset(tmp, 0, 8);
+	hf_data->update(&hc.vtable, tmp, 8);
+	hf_data->update(&hc.vtable, hash, hash_len);
+	hf_data->update(&hc.vtable, salt, salt_len);
+	hf_data->out(&hc.vtable, tmp);
+
+	/*
+	 * Check that the recomputed H value matches the one appearing
+	 * in the string.
+	 */
+	for (u = 0; u < hash_len; u ++) {
+		r |= tmp[u] ^ x[(xlen - hash_len - 1) + u];
+	}
+
+	return EQ0(r);
+}
diff --git a/third_party/bearssl/src/rsa_ssl_decrypt.c b/third_party/bearssl/src/rsa_ssl_decrypt.c
new file mode 100644
index 0000000..047eb18
--- /dev/null
+++ b/third_party/bearssl/src/rsa_ssl_decrypt.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_ssl_decrypt(br_rsa_private core, const br_rsa_private_key *sk,
+	unsigned char *data, size_t len)
+{
+	uint32_t x;
+	size_t u;
+
+	/*
+	 * A first check on length. Since this test works only on the
+	 * buffer length, it needs not (and cannot) be constant-time.
+	 */
+	if (len < 59 || len != (sk->n_bitlen + 7) >> 3) {
+		return 0;
+	}
+	x = core(data, sk);
+
+	x &= EQ(data[0], 0x00);
+	x &= EQ(data[1], 0x02);
+	for (u = 2; u < (len - 49); u ++) {
+		x &= NEQ(data[u], 0);
+	}
+	x &= EQ(data[len - 49], 0x00);
+	memmove(data, data + len - 48, 48);
+	return x;
+}
diff --git a/third_party/bearssl/src/settings.c b/third_party/bearssl/src/settings.c
new file mode 100644
index 0000000..309271c
--- /dev/null
+++ b/third_party/bearssl/src/settings.c
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static const br_config_option config[] = {
+	{ "BR_64",
+#if BR_64
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_AES_X86NI",
+#if BR_AES_X86NI
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_amd64",
+#if BR_amd64
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_ARMEL_CORTEXM_GCC",
+#if BR_ARMEL_CORTEXM_GCC
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_BE_UNALIGNED",
+#if BR_BE_UNALIGNED
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CLANG",
+#if BR_CLANG
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CLANG_3_7",
+#if BR_CLANG_3_7
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CLANG_3_8",
+#if BR_CLANG_3_8
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CT_MUL15",
+#if BR_CT_MUL15
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_CT_MUL31",
+#if BR_CT_MUL31
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC",
+#if BR_GCC
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_4",
+#if BR_GCC_4_4
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_5",
+#if BR_GCC_4_5
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_6",
+#if BR_GCC_4_6
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_7",
+#if BR_GCC_4_7
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_8",
+#if BR_GCC_4_8
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_4_9",
+#if BR_GCC_4_9
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_GCC_5_0",
+#if BR_GCC_5_0
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_i386",
+#if BR_i386
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_INT128",
+#if BR_INT128
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_LE_UNALIGNED",
+#if BR_LE_UNALIGNED
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_LOMUL",
+#if BR_LOMUL
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MAX_EC_SIZE", BR_MAX_EC_SIZE },
+	{ "BR_MAX_RSA_SIZE", BR_MAX_RSA_SIZE },
+	{ "BR_MAX_RSA_FACTOR", BR_MAX_RSA_FACTOR },
+	{ "BR_MSC",
+#if BR_MSC
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2005",
+#if BR_MSC_2005
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2008",
+#if BR_MSC_2008
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2010",
+#if BR_MSC_2010
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2012",
+#if BR_MSC_2012
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2013",
+#if BR_MSC_2013
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_MSC_2015",
+#if BR_MSC_2015
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_POWER8",
+#if BR_POWER8
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_RDRAND",
+#if BR_RDRAND
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_SLOW_MUL",
+#if BR_SLOW_MUL
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_SLOW_MUL15",
+#if BR_SLOW_MUL15
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_SSE2",
+#if BR_SSE2
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_UMUL128",
+#if BR_UMUL128
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_USE_UNIX_TIME",
+#if BR_USE_UNIX_TIME
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_USE_WIN32_RAND",
+#if BR_USE_WIN32_RAND
+	 1
+#else
+	 0
+#endif
+	},
+	{ "BR_USE_WIN32_TIME",
+#if BR_USE_WIN32_TIME
+	 1
+#else
+	 0
+#endif
+	},
+
+	{ NULL, 0 }
+};
+
+/* see bearssl.h */
+const br_config_option *
+br_get_config(void)
+{
+	return config;
+}
diff --git a/third_party/bearssl/src/sha1.c b/third_party/bearssl/src/sha1.c
new file mode 100644
index 0000000..4f65d84
--- /dev/null
+++ b/third_party/bearssl/src/sha1.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define F(B, C, D)     ((((C) ^ (D)) & (B)) ^ (D))
+#define G(B, C, D)     ((B) ^ (C) ^ (D))
+#define H(B, C, D)     (((D) & (C)) | (((D) | (C)) & (B)))
+#define I(B, C, D)     G(B, C, D)
+
+#define ROTL(x, n)    (((x) << (n)) | ((x) >> (32 - (n))))
+
+#define K1     ((uint32_t)0x5A827999)
+#define K2     ((uint32_t)0x6ED9EBA1)
+#define K3     ((uint32_t)0x8F1BBCDC)
+#define K4     ((uint32_t)0xCA62C1D6)
+
+/* see inner.h */
+const uint32_t br_sha1_IV[5] = {
+	0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0
+};
+
+/* see inner.h */
+void
+br_sha1_round(const unsigned char *buf, uint32_t *val)
+{
+	uint32_t m[80];
+	uint32_t a, b, c, d, e;
+	int i;
+
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	e = val[4];
+	br_range_dec32be(m, 16, buf);
+	for (i = 16; i < 80; i ++) {
+		uint32_t x = m[i - 3] ^ m[i - 8] ^ m[i - 14] ^ m[i - 16];
+		m[i] = ROTL(x, 1);
+	}
+
+	for (i = 0; i < 20; i += 5) {
+		e += ROTL(a, 5) + F(b, c, d) + K1 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + F(a, b, c) + K1 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + F(e, a, b) + K1 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + F(d, e, a) + K1 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + F(c, d, e) + K1 + m[i + 4]; c = ROTL(c, 30);
+	}
+	for (i = 20; i < 40; i += 5) {
+		e += ROTL(a, 5) + G(b, c, d) + K2 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + G(a, b, c) + K2 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + G(e, a, b) + K2 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + G(d, e, a) + K2 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + G(c, d, e) + K2 + m[i + 4]; c = ROTL(c, 30);
+	}
+	for (i = 40; i < 60; i += 5) {
+		e += ROTL(a, 5) + H(b, c, d) + K3 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + H(a, b, c) + K3 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + H(e, a, b) + K3 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + H(d, e, a) + K3 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + H(c, d, e) + K3 + m[i + 4]; c = ROTL(c, 30);
+	}
+	for (i = 60; i < 80; i += 5) {
+		e += ROTL(a, 5) + I(b, c, d) + K4 + m[i + 0]; b = ROTL(b, 30);
+		d += ROTL(e, 5) + I(a, b, c) + K4 + m[i + 1]; a = ROTL(a, 30);
+		c += ROTL(d, 5) + I(e, a, b) + K4 + m[i + 2]; e = ROTL(e, 30);
+		b += ROTL(c, 5) + I(d, e, a) + K4 + m[i + 3]; d = ROTL(d, 30);
+		a += ROTL(b, 5) + I(c, d, e) + K4 + m[i + 4]; c = ROTL(c, 30);
+	}
+
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+	val[4] += e;
+}
+
+/* see bearssl.h */
+void
+br_sha1_init(br_sha1_context *cc)
+{
+	cc->vtable = &br_sha1_vtable;
+	memcpy(cc->val, br_sha1_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha1_update(br_sha1_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		cc->count += (uint64_t)clen;
+		if (ptr == 64) {
+			br_sha1_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+/* see bearssl.h */
+void
+br_sha1_out(const br_sha1_context *cc, void *dst)
+{
+	unsigned char buf[64];
+	uint32_t val[5];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_sha1_round(buf, val);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	br_enc64be(buf + 56, cc->count << 3);
+	br_sha1_round(buf, val);
+	br_range_enc32be(dst, val, 5);
+}
+
+/* see bearssl.h */
+uint64_t
+br_sha1_state(const br_sha1_context *cc, void *dst)
+{
+	br_range_enc32be(dst, cc->val, 5);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_sha1_set_state(br_sha1_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec32be(cc->val, 5, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+const br_hash_class br_sha1_vtable = {
+	sizeof(br_sha1_context),
+	BR_HASHDESC_ID(br_sha1_ID)
+		| BR_HASHDESC_OUT(20)
+		| BR_HASHDESC_STATE(20)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE,
+	(void (*)(const br_hash_class **))&br_sha1_init,
+	(void (*)(const br_hash_class **, const void *, size_t))&br_sha1_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha1_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha1_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha1_set_state
+};
diff --git a/third_party/bearssl/src/sha2big.c b/third_party/bearssl/src/sha2big.c
new file mode 100644
index 0000000..5be92ed
--- /dev/null
+++ b/third_party/bearssl/src/sha2big.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define CH(X, Y, Z)    ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define MAJ(X, Y, Z)   (((Y) & (Z)) | (((Y) | (Z)) & (X)))
+
+#define ROTR(x, n)    (((uint64_t)(x) << (64 - (n))) | ((uint64_t)(x) >> (n)))
+
+#define BSG5_0(x)      (ROTR(x, 28) ^ ROTR(x, 34) ^ ROTR(x, 39))
+#define BSG5_1(x)      (ROTR(x, 14) ^ ROTR(x, 18) ^ ROTR(x, 41))
+#define SSG5_0(x)      (ROTR(x, 1) ^ ROTR(x, 8) ^ (uint64_t)((x) >> 7))
+#define SSG5_1(x)      (ROTR(x, 19) ^ ROTR(x, 61) ^ (uint64_t)((x) >> 6))
+
+static const uint64_t IV384[8] = {
+	0xCBBB9D5DC1059ED8, 0x629A292A367CD507,
+	0x9159015A3070DD17, 0x152FECD8F70E5939,
+	0x67332667FFC00B31, 0x8EB44A8768581511,
+	0xDB0C2E0D64F98FA7, 0x47B5481DBEFA4FA4
+};
+
+static const uint64_t IV512[8] = {
+	0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
+	0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
+	0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
+	0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
+};
+
+static const uint64_t K[80] = {
+	0x428A2F98D728AE22, 0x7137449123EF65CD,
+	0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
+	0x3956C25BF348B538, 0x59F111F1B605D019,
+	0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
+	0xD807AA98A3030242, 0x12835B0145706FBE,
+	0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
+	0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1,
+	0x9BDC06A725C71235, 0xC19BF174CF692694,
+	0xE49B69C19EF14AD2, 0xEFBE4786384F25E3,
+	0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65,
+	0x2DE92C6F592B0275, 0x4A7484AA6EA6E483,
+	0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5,
+	0x983E5152EE66DFAB, 0xA831C66D2DB43210,
+	0xB00327C898FB213F, 0xBF597FC7BEEF0EE4,
+	0xC6E00BF33DA88FC2, 0xD5A79147930AA725,
+	0x06CA6351E003826F, 0x142929670A0E6E70,
+	0x27B70A8546D22FFC, 0x2E1B21385C26C926,
+	0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF,
+	0x650A73548BAF63DE, 0x766A0ABB3C77B2A8,
+	0x81C2C92E47EDAEE6, 0x92722C851482353B,
+	0xA2BFE8A14CF10364, 0xA81A664BBC423001,
+	0xC24B8B70D0F89791, 0xC76C51A30654BE30,
+	0xD192E819D6EF5218, 0xD69906245565A910,
+	0xF40E35855771202A, 0x106AA07032BBD1B8,
+	0x19A4C116B8D2D0C8, 0x1E376C085141AB53,
+	0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8,
+	0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB,
+	0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3,
+	0x748F82EE5DEFB2FC, 0x78A5636F43172F60,
+	0x84C87814A1F0AB72, 0x8CC702081A6439EC,
+	0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9,
+	0xBEF9A3F7B2C67915, 0xC67178F2E372532B,
+	0xCA273ECEEA26619C, 0xD186B8C721C0C207,
+	0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178,
+	0x06F067AA72176FBA, 0x0A637DC5A2C898A6,
+	0x113F9804BEF90DAE, 0x1B710B35131C471B,
+	0x28DB77F523047D84, 0x32CAAB7B40C72493,
+	0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C,
+	0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A,
+	0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817
+};
+
+static void
+sha2big_round(const unsigned char *buf, uint64_t *val)
+{
+
+#define SHA2BIG_STEP(A, B, C, D, E, F, G, H, j)   do { \
+		uint64_t T1, T2; \
+		T1 = H + BSG5_1(E) + CH(E, F, G) + K[j] + w[j]; \
+		T2 = BSG5_0(A) + MAJ(A, B, C); \
+		D += T1; \
+		H = T1 + T2; \
+	} while (0)
+
+	int i;
+	uint64_t a, b, c, d, e, f, g, h;
+	uint64_t w[80];
+
+	br_range_dec64be(w, 16, buf);
+	for (i = 16; i < 80; i ++) {
+		w[i] = SSG5_1(w[i - 2]) + w[i - 7]
+			+ SSG5_0(w[i - 15]) + w[i - 16];
+	}
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	e = val[4];
+	f = val[5];
+	g = val[6];
+	h = val[7];
+	for (i = 0; i < 80; i += 8) {
+		SHA2BIG_STEP(a, b, c, d, e, f, g, h, i + 0);
+		SHA2BIG_STEP(h, a, b, c, d, e, f, g, i + 1);
+		SHA2BIG_STEP(g, h, a, b, c, d, e, f, i + 2);
+		SHA2BIG_STEP(f, g, h, a, b, c, d, e, i + 3);
+		SHA2BIG_STEP(e, f, g, h, a, b, c, d, i + 4);
+		SHA2BIG_STEP(d, e, f, g, h, a, b, c, i + 5);
+		SHA2BIG_STEP(c, d, e, f, g, h, a, b, i + 6);
+		SHA2BIG_STEP(b, c, d, e, f, g, h, a, i + 7);
+	}
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+	val[4] += e;
+	val[5] += f;
+	val[6] += g;
+	val[7] += h;
+}
+
+static void
+sha2big_update(br_sha384_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 127;
+	cc->count += (uint64_t)len;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 128 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		if (ptr == 128) {
+			sha2big_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+static void
+sha2big_out(const br_sha384_context *cc, void *dst, int num)
+{
+	unsigned char buf[128];
+	uint64_t val[8];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 127;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 112) {
+		memset(buf + ptr, 0, 128 - ptr);
+		sha2big_round(buf, val);
+		memset(buf, 0, 112);
+	} else {
+		memset(buf + ptr, 0, 112 - ptr);
+	}
+	br_enc64be(buf + 112, cc->count >> 61);
+	br_enc64be(buf + 120, cc->count << 3);
+	sha2big_round(buf, val);
+	br_range_enc64be(dst, val, num);
+}
+
+/* see bearssl.h */
+void
+br_sha384_init(br_sha384_context *cc)
+{
+	cc->vtable = &br_sha384_vtable;
+	memcpy(cc->val, IV384, sizeof IV384);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha384_update(br_sha384_context *cc, const void *data, size_t len)
+{
+	sha2big_update(cc, data, len);
+}
+
+/* see bearssl.h */
+void
+br_sha384_out(const br_sha384_context *cc, void *dst)
+{
+	sha2big_out(cc, dst, 6);
+}
+
+/* see bearssl.h */
+uint64_t
+br_sha384_state(const br_sha384_context *cc, void *dst)
+{
+	br_range_enc64be(dst, cc->val, 8);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_sha384_set_state(br_sha384_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec64be(cc->val, 8, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+void
+br_sha512_init(br_sha512_context *cc)
+{
+	cc->vtable = &br_sha512_vtable;
+	memcpy(cc->val, IV512, sizeof IV512);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha512_out(const br_sha512_context *cc, void *dst)
+{
+	sha2big_out(cc, dst, 8);
+}
+
+/* see bearssl.h */
+const br_hash_class br_sha384_vtable = {
+	sizeof(br_sha384_context),
+	BR_HASHDESC_ID(br_sha384_ID)
+		| BR_HASHDESC_OUT(48)
+		| BR_HASHDESC_STATE(64)
+		| BR_HASHDESC_LBLEN(7)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE
+		| BR_HASHDESC_MD_PADDING_128,
+	(void (*)(const br_hash_class **))&br_sha384_init,
+	(void (*)(const br_hash_class **, const void *, size_t))
+		&br_sha384_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha384_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha384_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha384_set_state
+};
+
+/* see bearssl.h */
+const br_hash_class br_sha512_vtable = {
+	sizeof(br_sha512_context),
+	BR_HASHDESC_ID(br_sha512_ID)
+		| BR_HASHDESC_OUT(64)
+		| BR_HASHDESC_STATE(64)
+		| BR_HASHDESC_LBLEN(7)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE
+		| BR_HASHDESC_MD_PADDING_128,
+	(void (*)(const br_hash_class **))&br_sha512_init,
+	(void (*)(const br_hash_class **, const void *, size_t))
+		&br_sha512_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha512_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha512_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha512_set_state
+};
diff --git a/third_party/bearssl/src/sha2small.c b/third_party/bearssl/src/sha2small.c
new file mode 100644
index 0000000..ca19655
--- /dev/null
+++ b/third_party/bearssl/src/sha2small.c
@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define CH(X, Y, Z)    ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define MAJ(X, Y, Z)   (((Y) & (Z)) | (((Y) | (Z)) & (X)))
+
+#define ROTR(x, n)    (((uint32_t)(x) << (32 - (n))) | ((uint32_t)(x) >> (n)))
+
+#define BSG2_0(x)      (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
+#define BSG2_1(x)      (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
+#define SSG2_0(x)      (ROTR(x, 7) ^ ROTR(x, 18) ^ (uint32_t)((x) >> 3))
+#define SSG2_1(x)      (ROTR(x, 17) ^ ROTR(x, 19) ^ (uint32_t)((x) >> 10))
+
+/* see inner.h */
+const uint32_t br_sha224_IV[8] = {
+	0xC1059ED8, 0x367CD507, 0x3070DD17, 0xF70E5939,
+	0xFFC00B31, 0x68581511, 0x64F98FA7, 0xBEFA4FA4
+};
+
+/* see inner.h */
+const uint32_t br_sha256_IV[8] = {
+	0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A,
+	0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
+};
+
+static const uint32_t K[64] = {
+	0x428A2F98, 0x71374491, 0xB5C0FBCF, 0xE9B5DBA5,
+	0x3956C25B, 0x59F111F1, 0x923F82A4, 0xAB1C5ED5,
+	0xD807AA98, 0x12835B01, 0x243185BE, 0x550C7DC3,
+	0x72BE5D74, 0x80DEB1FE, 0x9BDC06A7, 0xC19BF174,
+	0xE49B69C1, 0xEFBE4786, 0x0FC19DC6, 0x240CA1CC,
+	0x2DE92C6F, 0x4A7484AA, 0x5CB0A9DC, 0x76F988DA,
+	0x983E5152, 0xA831C66D, 0xB00327C8, 0xBF597FC7,
+	0xC6E00BF3, 0xD5A79147, 0x06CA6351, 0x14292967,
+	0x27B70A85, 0x2E1B2138, 0x4D2C6DFC, 0x53380D13,
+	0x650A7354, 0x766A0ABB, 0x81C2C92E, 0x92722C85,
+	0xA2BFE8A1, 0xA81A664B, 0xC24B8B70, 0xC76C51A3,
+	0xD192E819, 0xD6990624, 0xF40E3585, 0x106AA070,
+	0x19A4C116, 0x1E376C08, 0x2748774C, 0x34B0BCB5,
+	0x391C0CB3, 0x4ED8AA4A, 0x5B9CCA4F, 0x682E6FF3,
+	0x748F82EE, 0x78A5636F, 0x84C87814, 0x8CC70208,
+	0x90BEFFFA, 0xA4506CEB, 0xBEF9A3F7, 0xC67178F2
+};
+
+/* see inner.h */
+void
+br_sha2small_round(const unsigned char *buf, uint32_t *val)
+{
+
+#define SHA2_STEP(A, B, C, D, E, F, G, H, j)   do { \
+		uint32_t T1, T2; \
+		T1 = H + BSG2_1(E) + CH(E, F, G) + K[j] + w[j]; \
+		T2 = BSG2_0(A) + MAJ(A, B, C); \
+		D += T1; \
+		H = T1 + T2; \
+	} while (0)
+
+	int i;
+	uint32_t a, b, c, d, e, f, g, h;
+	uint32_t w[64];
+
+	br_range_dec32be(w, 16, buf);
+	for (i = 16; i < 64; i ++) {
+		w[i] = SSG2_1(w[i - 2]) + w[i - 7]
+			+ SSG2_0(w[i - 15]) + w[i - 16];
+	}
+	a = val[0];
+	b = val[1];
+	c = val[2];
+	d = val[3];
+	e = val[4];
+	f = val[5];
+	g = val[6];
+	h = val[7];
+	for (i = 0; i < 64; i += 8) {
+		SHA2_STEP(a, b, c, d, e, f, g, h, i + 0);
+		SHA2_STEP(h, a, b, c, d, e, f, g, i + 1);
+		SHA2_STEP(g, h, a, b, c, d, e, f, i + 2);
+		SHA2_STEP(f, g, h, a, b, c, d, e, i + 3);
+		SHA2_STEP(e, f, g, h, a, b, c, d, i + 4);
+		SHA2_STEP(d, e, f, g, h, a, b, c, i + 5);
+		SHA2_STEP(c, d, e, f, g, h, a, b, i + 6);
+		SHA2_STEP(b, c, d, e, f, g, h, a, i + 7);
+	}
+	val[0] += a;
+	val[1] += b;
+	val[2] += c;
+	val[3] += d;
+	val[4] += e;
+	val[5] += f;
+	val[6] += g;
+	val[7] += h;
+
+#if 0
+/* obsolete */
+#define SHA2_MEXP1(pc)   do { \
+		W[pc] = br_dec32be(buf + ((pc) << 2)); \
+	} while (0)
+
+#define SHA2_MEXP2(pc)   do { \
+		W[(pc) & 0x0F] = SSG2_1(W[((pc) - 2) & 0x0F]) \
+			+ W[((pc) - 7) & 0x0F] \
+			+ SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]; \
+	} while (0)
+
+#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, pc)   do { \
+		uint32_t t1, t2; \
+		SHA2_MEXP ## n(pc); \
+		t1 = h + BSG2_1(e) + CH(e, f, g) \
+			+ K[pcount + (pc)] + W[(pc) & 0x0F]; \
+		t2 = BSG2_0(a) + MAJ(a, b, c); \
+		d += t1; \
+		h = t1 + t2; \
+	} while (0)
+
+#define SHA2_STEP1(a, b, c, d, e, f, g, h, pc) \
+	SHA2_STEPn(1, a, b, c, d, e, f, g, h, pc)
+#define SHA2_STEP2(a, b, c, d, e, f, g, h, pc) \
+	SHA2_STEPn(2, a, b, c, d, e, f, g, h, pc)
+
+	uint32_t A, B, C, D, E, F, G, H;
+	uint32_t W[16];
+	unsigned pcount;
+
+	A = val[0];
+	B = val[1];
+	C = val[2];
+	D = val[3];
+	E = val[4];
+	F = val[5];
+	G = val[6];
+	H = val[7];
+	pcount = 0;
+	SHA2_STEP1(A, B, C, D, E, F, G, H,  0);
+	SHA2_STEP1(H, A, B, C, D, E, F, G,  1);
+	SHA2_STEP1(G, H, A, B, C, D, E, F,  2);
+	SHA2_STEP1(F, G, H, A, B, C, D, E,  3);
+	SHA2_STEP1(E, F, G, H, A, B, C, D,  4);
+	SHA2_STEP1(D, E, F, G, H, A, B, C,  5);
+	SHA2_STEP1(C, D, E, F, G, H, A, B,  6);
+	SHA2_STEP1(B, C, D, E, F, G, H, A,  7);
+	SHA2_STEP1(A, B, C, D, E, F, G, H,  8);
+	SHA2_STEP1(H, A, B, C, D, E, F, G,  9);
+	SHA2_STEP1(G, H, A, B, C, D, E, F, 10);
+	SHA2_STEP1(F, G, H, A, B, C, D, E, 11);
+	SHA2_STEP1(E, F, G, H, A, B, C, D, 12);
+	SHA2_STEP1(D, E, F, G, H, A, B, C, 13);
+	SHA2_STEP1(C, D, E, F, G, H, A, B, 14);
+	SHA2_STEP1(B, C, D, E, F, G, H, A, 15);
+	for (pcount = 16; pcount < 64; pcount += 16) {
+		SHA2_STEP2(A, B, C, D, E, F, G, H,  0);
+		SHA2_STEP2(H, A, B, C, D, E, F, G,  1);
+		SHA2_STEP2(G, H, A, B, C, D, E, F,  2);
+		SHA2_STEP2(F, G, H, A, B, C, D, E,  3);
+		SHA2_STEP2(E, F, G, H, A, B, C, D,  4);
+		SHA2_STEP2(D, E, F, G, H, A, B, C,  5);
+		SHA2_STEP2(C, D, E, F, G, H, A, B,  6);
+		SHA2_STEP2(B, C, D, E, F, G, H, A,  7);
+		SHA2_STEP2(A, B, C, D, E, F, G, H,  8);
+		SHA2_STEP2(H, A, B, C, D, E, F, G,  9);
+		SHA2_STEP2(G, H, A, B, C, D, E, F, 10);
+		SHA2_STEP2(F, G, H, A, B, C, D, E, 11);
+		SHA2_STEP2(E, F, G, H, A, B, C, D, 12);
+		SHA2_STEP2(D, E, F, G, H, A, B, C, 13);
+		SHA2_STEP2(C, D, E, F, G, H, A, B, 14);
+		SHA2_STEP2(B, C, D, E, F, G, H, A, 15);
+	}
+	val[0] += A;
+	val[1] += B;
+	val[2] += C;
+	val[3] += D;
+	val[4] += E;
+	val[5] += F;
+	val[6] += G;
+	val[7] += H;
+#endif
+}
+
+static void
+sha2small_update(br_sha224_context *cc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t ptr;
+
+	buf = data;
+	ptr = (size_t)cc->count & 63;
+	cc->count += (uint64_t)len;
+	while (len > 0) {
+		size_t clen;
+
+		clen = 64 - ptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(cc->buf + ptr, buf, clen);
+		ptr += clen;
+		buf += clen;
+		len -= clen;
+		if (ptr == 64) {
+			br_sha2small_round(cc->buf, cc->val);
+			ptr = 0;
+		}
+	}
+}
+
+static void
+sha2small_out(const br_sha224_context *cc, void *dst, int num)
+{
+	unsigned char buf[64];
+	uint32_t val[8];
+	size_t ptr;
+
+	ptr = (size_t)cc->count & 63;
+	memcpy(buf, cc->buf, ptr);
+	memcpy(val, cc->val, sizeof val);
+	buf[ptr ++] = 0x80;
+	if (ptr > 56) {
+		memset(buf + ptr, 0, 64 - ptr);
+		br_sha2small_round(buf, val);
+		memset(buf, 0, 56);
+	} else {
+		memset(buf + ptr, 0, 56 - ptr);
+	}
+	br_enc64be(buf + 56, cc->count << 3);
+	br_sha2small_round(buf, val);
+	br_range_enc32be(dst, val, num);
+}
+
+/* see bearssl.h */
+void
+br_sha224_init(br_sha224_context *cc)
+{
+	cc->vtable = &br_sha224_vtable;
+	memcpy(cc->val, br_sha224_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha224_update(br_sha224_context *cc, const void *data, size_t len)
+{
+	sha2small_update(cc, data, len);
+}
+
+/* see bearssl.h */
+void
+br_sha224_out(const br_sha224_context *cc, void *dst)
+{
+	sha2small_out(cc, dst, 7);
+}
+
+/* see bearssl.h */
+uint64_t
+br_sha224_state(const br_sha224_context *cc, void *dst)
+{
+	br_range_enc32be(dst, cc->val, 8);
+	return cc->count;
+}
+
+/* see bearssl.h */
+void
+br_sha224_set_state(br_sha224_context *cc, const void *stb, uint64_t count)
+{
+	br_range_dec32be(cc->val, 8, stb);
+	cc->count = count;
+}
+
+/* see bearssl.h */
+void
+br_sha256_init(br_sha256_context *cc)
+{
+	cc->vtable = &br_sha256_vtable;
+	memcpy(cc->val, br_sha256_IV, sizeof cc->val);
+	cc->count = 0;
+}
+
+/* see bearssl.h */
+void
+br_sha256_out(const br_sha256_context *cc, void *dst)
+{
+	sha2small_out(cc, dst, 8);
+}
+
+/* see bearssl.h */
+const br_hash_class br_sha224_vtable = {
+	sizeof(br_sha224_context),
+	BR_HASHDESC_ID(br_sha224_ID)
+		| BR_HASHDESC_OUT(28)
+		| BR_HASHDESC_STATE(32)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE,
+	(void (*)(const br_hash_class **))&br_sha224_init,
+	(void (*)(const br_hash_class **,
+		const void *, size_t))&br_sha224_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha224_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha224_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha224_set_state
+};
+
+/* see bearssl.h */
+const br_hash_class br_sha256_vtable = {
+	sizeof(br_sha256_context),
+	BR_HASHDESC_ID(br_sha256_ID)
+		| BR_HASHDESC_OUT(32)
+		| BR_HASHDESC_STATE(32)
+		| BR_HASHDESC_LBLEN(6)
+		| BR_HASHDESC_MD_PADDING
+		| BR_HASHDESC_MD_PADDING_BE,
+	(void (*)(const br_hash_class **))&br_sha256_init,
+	(void (*)(const br_hash_class **,
+		const void *, size_t))&br_sha256_update,
+	(void (*)(const br_hash_class *const *, void *))&br_sha256_out,
+	(uint64_t (*)(const br_hash_class *const *, void *))&br_sha256_state,
+	(void (*)(const br_hash_class **, const void *, uint64_t))
+		&br_sha256_set_state
+};
diff --git a/third_party/bearssl/src/shake.c b/third_party/bearssl/src/shake.c
new file mode 100644
index 0000000..80d7176
--- /dev/null
+++ b/third_party/bearssl/src/shake.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Round constants.
+ */
+static const uint64_t RC[] = {
+	0x0000000000000001, 0x0000000000008082,
+	0x800000000000808A, 0x8000000080008000,
+	0x000000000000808B, 0x0000000080000001,
+	0x8000000080008081, 0x8000000000008009,
+	0x000000000000008A, 0x0000000000000088,
+	0x0000000080008009, 0x000000008000000A,
+	0x000000008000808B, 0x800000000000008B,
+	0x8000000000008089, 0x8000000000008003,
+	0x8000000000008002, 0x8000000000000080,
+	0x000000000000800A, 0x800000008000000A,
+	0x8000000080008081, 0x8000000000008080,
+	0x0000000080000001, 0x8000000080008008
+};
+
+/*
+ * XOR a block of data into the provided state. This supports only
+ * blocks whose length is a multiple of 64 bits.
+ */
+static void
+xor_block(uint64_t *A, const void *data, size_t rate)
+{
+	size_t u;
+
+	for (u = 0; u < rate; u += 8) {
+		A[u >> 3] ^= br_dec64le((const unsigned char *)data + u);
+	}
+}
+
+/*
+ * Process a block with the provided data. The data length must be a
+ * multiple of 8 (in bytes); normally, this is the "rate".
+ */
+static void
+process_block(uint64_t *A)
+{
+	uint64_t t0, t1, t2, t3, t4;
+	uint64_t tt0, tt1, tt2, tt3;
+	uint64_t t, kt;
+	uint64_t c0, c1, c2, c3, c4, bnn;
+	int j;
+
+	/*
+	 * Compute the 24 rounds. This loop is partially unrolled (each
+	 * iteration computes two rounds).
+	 */
+	for (j = 0; j < 24; j += 2) {
+
+		tt0 = A[ 1] ^ A[ 6];
+		tt1 = A[11] ^ A[16];
+		tt0 ^= A[21] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 4] ^ A[ 9];
+		tt3 = A[14] ^ A[19];
+		tt0 ^= A[24];
+		tt2 ^= tt3;
+		t0 = tt0 ^ tt2;
+
+		tt0 = A[ 2] ^ A[ 7];
+		tt1 = A[12] ^ A[17];
+		tt0 ^= A[22] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 0] ^ A[ 5];
+		tt3 = A[10] ^ A[15];
+		tt0 ^= A[20];
+		tt2 ^= tt3;
+		t1 = tt0 ^ tt2;
+
+		tt0 = A[ 3] ^ A[ 8];
+		tt1 = A[13] ^ A[18];
+		tt0 ^= A[23] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 1] ^ A[ 6];
+		tt3 = A[11] ^ A[16];
+		tt0 ^= A[21];
+		tt2 ^= tt3;
+		t2 = tt0 ^ tt2;
+
+		tt0 = A[ 4] ^ A[ 9];
+		tt1 = A[14] ^ A[19];
+		tt0 ^= A[24] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 2] ^ A[ 7];
+		tt3 = A[12] ^ A[17];
+		tt0 ^= A[22];
+		tt2 ^= tt3;
+		t3 = tt0 ^ tt2;
+
+		tt0 = A[ 0] ^ A[ 5];
+		tt1 = A[10] ^ A[15];
+		tt0 ^= A[20] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 3] ^ A[ 8];
+		tt3 = A[13] ^ A[18];
+		tt0 ^= A[23];
+		tt2 ^= tt3;
+		t4 = tt0 ^ tt2;
+
+		A[ 0] = A[ 0] ^ t0;
+		A[ 5] = A[ 5] ^ t0;
+		A[10] = A[10] ^ t0;
+		A[15] = A[15] ^ t0;
+		A[20] = A[20] ^ t0;
+		A[ 1] = A[ 1] ^ t1;
+		A[ 6] = A[ 6] ^ t1;
+		A[11] = A[11] ^ t1;
+		A[16] = A[16] ^ t1;
+		A[21] = A[21] ^ t1;
+		A[ 2] = A[ 2] ^ t2;
+		A[ 7] = A[ 7] ^ t2;
+		A[12] = A[12] ^ t2;
+		A[17] = A[17] ^ t2;
+		A[22] = A[22] ^ t2;
+		A[ 3] = A[ 3] ^ t3;
+		A[ 8] = A[ 8] ^ t3;
+		A[13] = A[13] ^ t3;
+		A[18] = A[18] ^ t3;
+		A[23] = A[23] ^ t3;
+		A[ 4] = A[ 4] ^ t4;
+		A[ 9] = A[ 9] ^ t4;
+		A[14] = A[14] ^ t4;
+		A[19] = A[19] ^ t4;
+		A[24] = A[24] ^ t4;
+		A[ 5] = (A[ 5] << 36) | (A[ 5] >> (64 - 36));
+		A[10] = (A[10] <<  3) | (A[10] >> (64 -  3));
+		A[15] = (A[15] << 41) | (A[15] >> (64 - 41));
+		A[20] = (A[20] << 18) | (A[20] >> (64 - 18));
+		A[ 1] = (A[ 1] <<  1) | (A[ 1] >> (64 -  1));
+		A[ 6] = (A[ 6] << 44) | (A[ 6] >> (64 - 44));
+		A[11] = (A[11] << 10) | (A[11] >> (64 - 10));
+		A[16] = (A[16] << 45) | (A[16] >> (64 - 45));
+		A[21] = (A[21] <<  2) | (A[21] >> (64 - 2));
+		A[ 2] = (A[ 2] << 62) | (A[ 2] >> (64 - 62));
+		A[ 7] = (A[ 7] <<  6) | (A[ 7] >> (64 -  6));
+		A[12] = (A[12] << 43) | (A[12] >> (64 - 43));
+		A[17] = (A[17] << 15) | (A[17] >> (64 - 15));
+		A[22] = (A[22] << 61) | (A[22] >> (64 - 61));
+		A[ 3] = (A[ 3] << 28) | (A[ 3] >> (64 - 28));
+		A[ 8] = (A[ 8] << 55) | (A[ 8] >> (64 - 55));
+		A[13] = (A[13] << 25) | (A[13] >> (64 - 25));
+		A[18] = (A[18] << 21) | (A[18] >> (64 - 21));
+		A[23] = (A[23] << 56) | (A[23] >> (64 - 56));
+		A[ 4] = (A[ 4] << 27) | (A[ 4] >> (64 - 27));
+		A[ 9] = (A[ 9] << 20) | (A[ 9] >> (64 - 20));
+		A[14] = (A[14] << 39) | (A[14] >> (64 - 39));
+		A[19] = (A[19] <<  8) | (A[19] >> (64 -  8));
+		A[24] = (A[24] << 14) | (A[24] >> (64 - 14));
+		bnn = ~A[12];
+		kt = A[ 6] | A[12];
+		c0 = A[ 0] ^ kt;
+		kt = bnn | A[18];
+		c1 = A[ 6] ^ kt;
+		kt = A[18] & A[24];
+		c2 = A[12] ^ kt;
+		kt = A[24] | A[ 0];
+		c3 = A[18] ^ kt;
+		kt = A[ 0] & A[ 6];
+		c4 = A[24] ^ kt;
+		A[ 0] = c0;
+		A[ 6] = c1;
+		A[12] = c2;
+		A[18] = c3;
+		A[24] = c4;
+		bnn = ~A[22];
+		kt = A[ 9] | A[10];
+		c0 = A[ 3] ^ kt;
+		kt = A[10] & A[16];
+		c1 = A[ 9] ^ kt;
+		kt = A[16] | bnn;
+		c2 = A[10] ^ kt;
+		kt = A[22] | A[ 3];
+		c3 = A[16] ^ kt;
+		kt = A[ 3] & A[ 9];
+		c4 = A[22] ^ kt;
+		A[ 3] = c0;
+		A[ 9] = c1;
+		A[10] = c2;
+		A[16] = c3;
+		A[22] = c4;
+		bnn = ~A[19];
+		kt = A[ 7] | A[13];
+		c0 = A[ 1] ^ kt;
+		kt = A[13] & A[19];
+		c1 = A[ 7] ^ kt;
+		kt = bnn & A[20];
+		c2 = A[13] ^ kt;
+		kt = A[20] | A[ 1];
+		c3 = bnn ^ kt;
+		kt = A[ 1] & A[ 7];
+		c4 = A[20] ^ kt;
+		A[ 1] = c0;
+		A[ 7] = c1;
+		A[13] = c2;
+		A[19] = c3;
+		A[20] = c4;
+		bnn = ~A[17];
+		kt = A[ 5] & A[11];
+		c0 = A[ 4] ^ kt;
+		kt = A[11] | A[17];
+		c1 = A[ 5] ^ kt;
+		kt = bnn | A[23];
+		c2 = A[11] ^ kt;
+		kt = A[23] & A[ 4];
+		c3 = bnn ^ kt;
+		kt = A[ 4] | A[ 5];
+		c4 = A[23] ^ kt;
+		A[ 4] = c0;
+		A[ 5] = c1;
+		A[11] = c2;
+		A[17] = c3;
+		A[23] = c4;
+		bnn = ~A[ 8];
+		kt = bnn & A[14];
+		c0 = A[ 2] ^ kt;
+		kt = A[14] | A[15];
+		c1 = bnn ^ kt;
+		kt = A[15] & A[21];
+		c2 = A[14] ^ kt;
+		kt = A[21] | A[ 2];
+		c3 = A[15] ^ kt;
+		kt = A[ 2] & A[ 8];
+		c4 = A[21] ^ kt;
+		A[ 2] = c0;
+		A[ 8] = c1;
+		A[14] = c2;
+		A[15] = c3;
+		A[21] = c4;
+		A[ 0] = A[ 0] ^ RC[j + 0];
+
+		tt0 = A[ 6] ^ A[ 9];
+		tt1 = A[ 7] ^ A[ 5];
+		tt0 ^= A[ 8] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[24] ^ A[22];
+		tt3 = A[20] ^ A[23];
+		tt0 ^= A[21];
+		tt2 ^= tt3;
+		t0 = tt0 ^ tt2;
+
+		tt0 = A[12] ^ A[10];
+		tt1 = A[13] ^ A[11];
+		tt0 ^= A[14] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 0] ^ A[ 3];
+		tt3 = A[ 1] ^ A[ 4];
+		tt0 ^= A[ 2];
+		tt2 ^= tt3;
+		t1 = tt0 ^ tt2;
+
+		tt0 = A[18] ^ A[16];
+		tt1 = A[19] ^ A[17];
+		tt0 ^= A[15] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[ 6] ^ A[ 9];
+		tt3 = A[ 7] ^ A[ 5];
+		tt0 ^= A[ 8];
+		tt2 ^= tt3;
+		t2 = tt0 ^ tt2;
+
+		tt0 = A[24] ^ A[22];
+		tt1 = A[20] ^ A[23];
+		tt0 ^= A[21] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[12] ^ A[10];
+		tt3 = A[13] ^ A[11];
+		tt0 ^= A[14];
+		tt2 ^= tt3;
+		t3 = tt0 ^ tt2;
+
+		tt0 = A[ 0] ^ A[ 3];
+		tt1 = A[ 1] ^ A[ 4];
+		tt0 ^= A[ 2] ^ tt1;
+		tt0 = (tt0 << 1) | (tt0 >> 63);
+		tt2 = A[18] ^ A[16];
+		tt3 = A[19] ^ A[17];
+		tt0 ^= A[15];
+		tt2 ^= tt3;
+		t4 = tt0 ^ tt2;
+
+		A[ 0] = A[ 0] ^ t0;
+		A[ 3] = A[ 3] ^ t0;
+		A[ 1] = A[ 1] ^ t0;
+		A[ 4] = A[ 4] ^ t0;
+		A[ 2] = A[ 2] ^ t0;
+		A[ 6] = A[ 6] ^ t1;
+		A[ 9] = A[ 9] ^ t1;
+		A[ 7] = A[ 7] ^ t1;
+		A[ 5] = A[ 5] ^ t1;
+		A[ 8] = A[ 8] ^ t1;
+		A[12] = A[12] ^ t2;
+		A[10] = A[10] ^ t2;
+		A[13] = A[13] ^ t2;
+		A[11] = A[11] ^ t2;
+		A[14] = A[14] ^ t2;
+		A[18] = A[18] ^ t3;
+		A[16] = A[16] ^ t3;
+		A[19] = A[19] ^ t3;
+		A[17] = A[17] ^ t3;
+		A[15] = A[15] ^ t3;
+		A[24] = A[24] ^ t4;
+		A[22] = A[22] ^ t4;
+		A[20] = A[20] ^ t4;
+		A[23] = A[23] ^ t4;
+		A[21] = A[21] ^ t4;
+		A[ 3] = (A[ 3] << 36) | (A[ 3] >> (64 - 36));
+		A[ 1] = (A[ 1] <<  3) | (A[ 1] >> (64 -  3));
+		A[ 4] = (A[ 4] << 41) | (A[ 4] >> (64 - 41));
+		A[ 2] = (A[ 2] << 18) | (A[ 2] >> (64 - 18));
+		A[ 6] = (A[ 6] <<  1) | (A[ 6] >> (64 -  1));
+		A[ 9] = (A[ 9] << 44) | (A[ 9] >> (64 - 44));
+		A[ 7] = (A[ 7] << 10) | (A[ 7] >> (64 - 10));
+		A[ 5] = (A[ 5] << 45) | (A[ 5] >> (64 - 45));
+		A[ 8] = (A[ 8] <<  2) | (A[ 8] >> (64 - 2));
+		A[12] = (A[12] << 62) | (A[12] >> (64 - 62));
+		A[10] = (A[10] <<  6) | (A[10] >> (64 -  6));
+		A[13] = (A[13] << 43) | (A[13] >> (64 - 43));
+		A[11] = (A[11] << 15) | (A[11] >> (64 - 15));
+		A[14] = (A[14] << 61) | (A[14] >> (64 - 61));
+		A[18] = (A[18] << 28) | (A[18] >> (64 - 28));
+		A[16] = (A[16] << 55) | (A[16] >> (64 - 55));
+		A[19] = (A[19] << 25) | (A[19] >> (64 - 25));
+		A[17] = (A[17] << 21) | (A[17] >> (64 - 21));
+		A[15] = (A[15] << 56) | (A[15] >> (64 - 56));
+		A[24] = (A[24] << 27) | (A[24] >> (64 - 27));
+		A[22] = (A[22] << 20) | (A[22] >> (64 - 20));
+		A[20] = (A[20] << 39) | (A[20] >> (64 - 39));
+		A[23] = (A[23] <<  8) | (A[23] >> (64 -  8));
+		A[21] = (A[21] << 14) | (A[21] >> (64 - 14));
+		bnn = ~A[13];
+		kt = A[ 9] | A[13];
+		c0 = A[ 0] ^ kt;
+		kt = bnn | A[17];
+		c1 = A[ 9] ^ kt;
+		kt = A[17] & A[21];
+		c2 = A[13] ^ kt;
+		kt = A[21] | A[ 0];
+		c3 = A[17] ^ kt;
+		kt = A[ 0] & A[ 9];
+		c4 = A[21] ^ kt;
+		A[ 0] = c0;
+		A[ 9] = c1;
+		A[13] = c2;
+		A[17] = c3;
+		A[21] = c4;
+		bnn = ~A[14];
+		kt = A[22] | A[ 1];
+		c0 = A[18] ^ kt;
+		kt = A[ 1] & A[ 5];
+		c1 = A[22] ^ kt;
+		kt = A[ 5] | bnn;
+		c2 = A[ 1] ^ kt;
+		kt = A[14] | A[18];
+		c3 = A[ 5] ^ kt;
+		kt = A[18] & A[22];
+		c4 = A[14] ^ kt;
+		A[18] = c0;
+		A[22] = c1;
+		A[ 1] = c2;
+		A[ 5] = c3;
+		A[14] = c4;
+		bnn = ~A[23];
+		kt = A[10] | A[19];
+		c0 = A[ 6] ^ kt;
+		kt = A[19] & A[23];
+		c1 = A[10] ^ kt;
+		kt = bnn & A[ 2];
+		c2 = A[19] ^ kt;
+		kt = A[ 2] | A[ 6];
+		c3 = bnn ^ kt;
+		kt = A[ 6] & A[10];
+		c4 = A[ 2] ^ kt;
+		A[ 6] = c0;
+		A[10] = c1;
+		A[19] = c2;
+		A[23] = c3;
+		A[ 2] = c4;
+		bnn = ~A[11];
+		kt = A[ 3] & A[ 7];
+		c0 = A[24] ^ kt;
+		kt = A[ 7] | A[11];
+		c1 = A[ 3] ^ kt;
+		kt = bnn | A[15];
+		c2 = A[ 7] ^ kt;
+		kt = A[15] & A[24];
+		c3 = bnn ^ kt;
+		kt = A[24] | A[ 3];
+		c4 = A[15] ^ kt;
+		A[24] = c0;
+		A[ 3] = c1;
+		A[ 7] = c2;
+		A[11] = c3;
+		A[15] = c4;
+		bnn = ~A[16];
+		kt = bnn & A[20];
+		c0 = A[12] ^ kt;
+		kt = A[20] | A[ 4];
+		c1 = bnn ^ kt;
+		kt = A[ 4] & A[ 8];
+		c2 = A[20] ^ kt;
+		kt = A[ 8] | A[12];
+		c3 = A[ 4] ^ kt;
+		kt = A[12] & A[16];
+		c4 = A[ 8] ^ kt;
+		A[12] = c0;
+		A[16] = c1;
+		A[20] = c2;
+		A[ 4] = c3;
+		A[ 8] = c4;
+		A[ 0] = A[ 0] ^ RC[j + 1];
+		t = A[ 5];
+		A[ 5] = A[18];
+		A[18] = A[11];
+		A[11] = A[10];
+		A[10] = A[ 6];
+		A[ 6] = A[22];
+		A[22] = A[20];
+		A[20] = A[12];
+		A[12] = A[19];
+		A[19] = A[15];
+		A[15] = A[24];
+		A[24] = A[ 8];
+		A[ 8] = t;
+		t = A[ 1];
+		A[ 1] = A[ 9];
+		A[ 9] = A[14];
+		A[14] = A[ 2];
+		A[ 2] = A[13];
+		A[13] = A[23];
+		A[23] = A[ 4];
+		A[ 4] = A[21];
+		A[21] = A[16];
+		A[16] = A[ 3];
+		A[ 3] = A[17];
+		A[17] = A[ 7];
+		A[ 7] = t;
+	}
+}
+
+/* see bearssl_kdf.h */
+void
+br_shake_init(br_shake_context *sc, int security_level)
+{
+	sc->rate = 200 - (size_t)(security_level >> 2);
+	sc->dptr = 0;
+	memset(sc->A, 0, sizeof sc->A);
+	sc->A[ 1] = ~(uint64_t)0;
+	sc->A[ 2] = ~(uint64_t)0;
+	sc->A[ 8] = ~(uint64_t)0;
+	sc->A[12] = ~(uint64_t)0;
+	sc->A[17] = ~(uint64_t)0;
+	sc->A[20] = ~(uint64_t)0;
+}
+
+/* see bearssl_kdf.h */
+void
+br_shake_inject(br_shake_context *sc, const void *data, size_t len)
+{
+	const unsigned char *buf;
+	size_t rate, dptr;
+
+	buf = data;
+	rate = sc->rate;
+	dptr = sc->dptr;
+	while (len > 0) {
+		size_t clen;
+
+		clen = rate - dptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(sc->dbuf + dptr, buf, clen);
+		dptr += clen;
+		buf += clen;
+		len -= clen;
+		if (dptr == rate) {
+			xor_block(sc->A, sc->dbuf, rate);
+			process_block(sc->A);
+			dptr = 0;
+		}
+	}
+	sc->dptr = dptr;
+}
+
+/* see bearssl_kdf.h */
+void
+br_shake_flip(br_shake_context *sc)
+{
+	/*
+	 * We apply padding and pre-XOR the value into the state. We
+	 * set dptr to the end of the buffer, so that first call to
+	 * shake_extract() will process the block.
+	 */
+	if ((sc->dptr + 1) == sc->rate) {
+		sc->dbuf[sc->dptr ++] = 0x9F;
+	} else {
+		sc->dbuf[sc->dptr ++] = 0x1F;
+		memset(sc->dbuf + sc->dptr, 0x00, sc->rate - sc->dptr - 1);
+		sc->dbuf[sc->rate - 1] = 0x80;
+		sc->dptr = sc->rate;
+	}
+	xor_block(sc->A, sc->dbuf, sc->rate);
+}
+
+/* see bearssl_kdf.h */
+void
+br_shake_produce(br_shake_context *sc, void *out, size_t len)
+{
+	unsigned char *buf;
+	size_t dptr, rate;
+
+	buf = out;
+	dptr = sc->dptr;
+	rate = sc->rate;
+	while (len > 0) {
+		size_t clen;
+
+		if (dptr == rate) {
+			unsigned char *dbuf;
+			uint64_t *A;
+
+			A = sc->A;
+			dbuf = sc->dbuf;
+			process_block(A);
+			br_enc64le(dbuf +   0,  A[ 0]);
+			br_enc64le(dbuf +   8, ~A[ 1]);
+			br_enc64le(dbuf +  16, ~A[ 2]);
+			br_enc64le(dbuf +  24,  A[ 3]);
+			br_enc64le(dbuf +  32,  A[ 4]);
+			br_enc64le(dbuf +  40,  A[ 5]);
+			br_enc64le(dbuf +  48,  A[ 6]);
+			br_enc64le(dbuf +  56,  A[ 7]);
+			br_enc64le(dbuf +  64, ~A[ 8]);
+			br_enc64le(dbuf +  72,  A[ 9]);
+			br_enc64le(dbuf +  80,  A[10]);
+			br_enc64le(dbuf +  88,  A[11]);
+			br_enc64le(dbuf +  96, ~A[12]);
+			br_enc64le(dbuf + 104,  A[13]);
+			br_enc64le(dbuf + 112,  A[14]);
+			br_enc64le(dbuf + 120,  A[15]);
+			br_enc64le(dbuf + 128,  A[16]);
+			br_enc64le(dbuf + 136, ~A[17]);
+			br_enc64le(dbuf + 144,  A[18]);
+			br_enc64le(dbuf + 152,  A[19]);
+			br_enc64le(dbuf + 160, ~A[20]);
+			br_enc64le(dbuf + 168,  A[21]);
+			br_enc64le(dbuf + 176,  A[22]);
+			br_enc64le(dbuf + 184,  A[23]);
+			br_enc64le(dbuf + 192,  A[24]);
+			dptr = 0;
+		}
+		clen = rate - dptr;
+		if (clen > len) {
+			clen = len;
+		}
+		memcpy(buf, sc->dbuf + dptr, clen);
+		dptr += clen;
+		buf += clen;
+		len -= clen;
+	}
+	sc->dptr = dptr;
+}
diff --git a/third_party/bearssl/src/skey_decoder.c b/third_party/bearssl/src/skey_decoder.c
new file mode 100644
index 0000000..9e285d7
--- /dev/null
+++ b/third_party/bearssl/src/skey_decoder.c
@@ -0,0 +1,650 @@
+/* Automatically generated code; do not modify directly. */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+	uint32_t *dp;
+	uint32_t *rp;
+	const unsigned char *ip;
+} t0_context;
+
+static uint32_t
+t0_parse7E_unsigned(const unsigned char **p)
+{
+	uint32_t x;
+
+	x = 0;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			return x;
+		}
+	}
+}
+
+static int32_t
+t0_parse7E_signed(const unsigned char **p)
+{
+	int neg;
+	uint32_t x;
+
+	neg = ((**p) >> 6) & 1;
+	x = (uint32_t)-neg;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			if (neg) {
+				return -(int32_t)~x - 1;
+			} else {
+				return (int32_t)x;
+			}
+		}
+	}
+}
+
+#define T0_VBYTE(x, n)   (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
+#define T0_FBYTE(x, n)   (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
+#define T0_SBYTE(x)      (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
+#define T0_INT1(x)       T0_FBYTE(x, 0)
+#define T0_INT2(x)       T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT3(x)       T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT4(x)       T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT5(x)       T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+
+/* static const unsigned char t0_datablock[]; */
+
+
+void br_skey_decoder_init_main(void *t0ctx);
+
+void br_skey_decoder_run(void *t0ctx);
+
+
+
+#include "inner.h"
+
+
+
+
+
+#include "inner.h"
+
+#define CTX   ((br_skey_decoder_context *)(void *)((unsigned char *)t0ctx - offsetof(br_skey_decoder_context, cpu)))
+#define CONTEXT_NAME   br_skey_decoder_context
+
+/* see bearssl_x509.h */
+void
+br_skey_decoder_init(br_skey_decoder_context *ctx)
+{
+	memset(ctx, 0, sizeof *ctx);
+	ctx->cpu.dp = &ctx->dp_stack[0];
+	ctx->cpu.rp = &ctx->rp_stack[0];
+	br_skey_decoder_init_main(&ctx->cpu);
+	br_skey_decoder_run(&ctx->cpu);
+}
+
+/* see bearssl_x509.h */
+void
+br_skey_decoder_push(br_skey_decoder_context *ctx,
+	const void *data, size_t len)
+{
+	ctx->hbuf = data;
+	ctx->hlen = len;
+	br_skey_decoder_run(&ctx->cpu);
+}
+
+
+
+static const unsigned char t0_datablock[] = {
+	0x00, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x07,
+	0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x08, 0x2A, 0x86, 0x48, 0xCE,
+	0x3D, 0x03, 0x01, 0x07, 0x05, 0x2B, 0x81, 0x04, 0x00, 0x22, 0x05, 0x2B,
+	0x81, 0x04, 0x00, 0x23
+};
+
+static const unsigned char t0_codeblock[] = {
+	0x00, 0x01, 0x01, 0x07, 0x00, 0x00, 0x01, 0x01, 0x08, 0x00, 0x00, 0x13,
+	0x13, 0x00, 0x00, 0x01, T0_INT1(BR_ERR_X509_BAD_TAG_CLASS), 0x00, 0x00,
+	0x01, T0_INT1(BR_ERR_X509_BAD_TAG_VALUE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_EXTRA_ELEMENT), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INDEFINITE_LENGTH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INNER_TRUNC), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INVALID_VALUE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_LIMIT_EXCEEDED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_CONSTRUCTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_PRIMITIVE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_OVERFLOW), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNEXPECTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNSUPPORTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_KEYTYPE_EC), 0x00, 0x00, 0x01, T0_INT1(BR_KEYTYPE_RSA),
+	0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, key_data)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, key_type)), 0x00, 0x00,
+	0x33, 0x48, 0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, pad)),
+	0x00, 0x00, 0x01, 0x13, 0x00, 0x00, 0x01, 0x1C, 0x00, 0x00, 0x01, 0x22,
+	0x00, 0x00, 0x05, 0x02, 0x2C, 0x16, 0x00, 0x00, 0x06, 0x02, 0x2D, 0x16,
+	0x00, 0x00, 0x01, 0x10, 0x3D, 0x00, 0x00, 0x0D, 0x05, 0x02, 0x2F, 0x16,
+	0x3A, 0x00, 0x00, 0x0D, 0x05, 0x02, 0x2F, 0x16, 0x3B, 0x00, 0x00, 0x06,
+	0x02, 0x27, 0x16, 0x00, 0x01, 0x03, 0x00, 0x54, 0x57, 0x01, 0x02, 0x3E,
+	0x55, 0x23, 0x06, 0x02, 0x30, 0x16, 0x57, 0x01, 0x04, 0x3E, 0x02, 0x00,
+	0x41, 0x3F, 0x00, 0x02, 0x03, 0x00, 0x53, 0x14, 0x14, 0x03, 0x01, 0x48,
+	0x0E, 0x06, 0x02, 0x30, 0x16, 0x33, 0x4C, 0x58, 0x01, 0x7F, 0x19, 0x0D,
+	0x06, 0x04, 0x13, 0x13, 0x04, 0x29, 0x01, 0x20, 0x19, 0x0D, 0x06, 0x16,
+	0x13, 0x3A, 0x53, 0x4D, 0x02, 0x00, 0x06, 0x09, 0x02, 0x00, 0x0C, 0x06,
+	0x02, 0x2A, 0x16, 0x04, 0x02, 0x03, 0x00, 0x3F, 0x04, 0x0D, 0x01, 0x21,
+	0x19, 0x0D, 0x06, 0x04, 0x13, 0x3A, 0x04, 0x03, 0x30, 0x16, 0x13, 0x5D,
+	0x02, 0x00, 0x05, 0x02, 0x30, 0x16, 0x02, 0x00, 0x02, 0x01, 0x1D, 0x00,
+	0x02, 0x53, 0x4B, 0x05, 0x02, 0x30, 0x16, 0x5B, 0x15, 0x06, 0x07, 0x5D,
+	0x01, 0x7F, 0x03, 0x01, 0x04, 0x16, 0x46, 0x15, 0x06, 0x10, 0x01, 0x00,
+	0x03, 0x01, 0x14, 0x06, 0x03, 0x4D, 0x04, 0x02, 0x01, 0x00, 0x03, 0x00,
+	0x04, 0x02, 0x30, 0x16, 0x3F, 0x57, 0x01, 0x04, 0x3E, 0x53, 0x02, 0x01,
+	0x06, 0x03, 0x43, 0x04, 0x03, 0x02, 0x00, 0x40, 0x3F, 0x5D, 0x02, 0x01,
+	0x06, 0x03, 0x32, 0x04, 0x01, 0x31, 0x00, 0x00, 0x54, 0x57, 0x01, 0x02,
+	0x3E, 0x55, 0x06, 0x02, 0x30, 0x16, 0x57, 0x01, 0x02, 0x3E, 0x44, 0x3F,
+	0x00, 0x07, 0x35, 0x50, 0x14, 0x05, 0x02, 0x2F, 0x16, 0x23, 0x01, 0x03,
+	0x0B, 0x33, 0x17, 0x47, 0x07, 0x03, 0x00, 0x4F, 0x4F, 0x35, 0x4E, 0x14,
+	0x14, 0x03, 0x01, 0x03, 0x02, 0x51, 0x14, 0x03, 0x03, 0x02, 0x02, 0x07,
+	0x14, 0x03, 0x02, 0x51, 0x14, 0x03, 0x04, 0x02, 0x02, 0x07, 0x14, 0x03,
+	0x02, 0x51, 0x14, 0x03, 0x05, 0x02, 0x02, 0x07, 0x14, 0x03, 0x02, 0x51,
+	0x03, 0x06, 0x02, 0x00, 0x02, 0x01, 0x02, 0x03, 0x02, 0x04, 0x02, 0x05,
+	0x02, 0x06, 0x1E, 0x00, 0x00, 0x19, 0x19, 0x00, 0x00, 0x01, 0x0B, 0x00,
+	0x00, 0x01, 0x00, 0x20, 0x14, 0x06, 0x08, 0x01, 0x01, 0x21, 0x20, 0x22,
+	0x20, 0x04, 0x75, 0x13, 0x00, 0x00, 0x01,
+	T0_INT2(3 * BR_X509_BUFSIZE_SIG), 0x00, 0x01, 0x01, 0x87, 0xFF, 0xFF,
+	0x7F, 0x54, 0x57, 0x01, 0x02, 0x3E, 0x55, 0x01, 0x01, 0x0E, 0x06, 0x02,
+	0x30, 0x16, 0x57, 0x01, 0x02, 0x19, 0x0D, 0x06, 0x06, 0x13, 0x3B, 0x44,
+	0x32, 0x04, 0x1C, 0x01, 0x04, 0x19, 0x0D, 0x06, 0x08, 0x13, 0x3B, 0x01,
+	0x00, 0x41, 0x31, 0x04, 0x0E, 0x01, 0x10, 0x19, 0x0D, 0x06, 0x05, 0x13,
+	0x3A, 0x42, 0x04, 0x03, 0x30, 0x16, 0x13, 0x03, 0x00, 0x3F, 0x02, 0x00,
+	0x34, 0x1F, 0x5A, 0x27, 0x16, 0x00, 0x01, 0x45, 0x0A, 0x06, 0x02, 0x29,
+	0x16, 0x14, 0x03, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x57, 0x01, 0x06,
+	0x3E, 0x56, 0x00, 0x00, 0x20, 0x14, 0x06, 0x07, 0x1A, 0x14, 0x06, 0x01,
+	0x12, 0x04, 0x76, 0x24, 0x00, 0x00, 0x4B, 0x05, 0x02, 0x30, 0x16, 0x37,
+	0x15, 0x06, 0x04, 0x01, 0x17, 0x04, 0x12, 0x38, 0x15, 0x06, 0x04, 0x01,
+	0x18, 0x04, 0x0A, 0x39, 0x15, 0x06, 0x04, 0x01, 0x19, 0x04, 0x02, 0x30,
+	0x16, 0x00, 0x00, 0x1C, 0x57, 0x01, 0x02, 0x3E, 0x09, 0x50, 0x00, 0x00,
+	0x35, 0x4E, 0x13, 0x00, 0x03, 0x14, 0x03, 0x00, 0x03, 0x01, 0x03, 0x02,
+	0x53, 0x59, 0x14, 0x01, 0x81, 0x00, 0x0F, 0x06, 0x02, 0x2E, 0x16, 0x14,
+	0x01, 0x00, 0x0D, 0x06, 0x0B, 0x13, 0x14, 0x05, 0x04, 0x13, 0x01, 0x00,
+	0x00, 0x59, 0x04, 0x6F, 0x02, 0x01, 0x14, 0x05, 0x02, 0x2B, 0x16, 0x23,
+	0x03, 0x01, 0x02, 0x02, 0x1F, 0x02, 0x02, 0x22, 0x03, 0x02, 0x14, 0x06,
+	0x03, 0x59, 0x04, 0x68, 0x13, 0x02, 0x00, 0x02, 0x01, 0x08, 0x00, 0x00,
+	0x14, 0x35, 0x1C, 0x08, 0x20, 0x1C, 0x07, 0x20, 0x4E, 0x00, 0x01, 0x59,
+	0x14, 0x01, 0x81, 0x00, 0x0A, 0x06, 0x01, 0x00, 0x01, 0x81, 0x00, 0x08,
+	0x14, 0x05, 0x02, 0x28, 0x16, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01,
+	0x00, 0x0E, 0x06, 0x19, 0x02, 0x00, 0x23, 0x03, 0x00, 0x14, 0x01, 0x83,
+	0xFF, 0xFF, 0x7F, 0x0E, 0x06, 0x02, 0x29, 0x16, 0x01, 0x08, 0x0B, 0x20,
+	0x59, 0x1C, 0x07, 0x04, 0x60, 0x00, 0x00, 0x52, 0x4A, 0x00, 0x00, 0x57,
+	0x3C, 0x53, 0x00, 0x01, 0x53, 0x14, 0x05, 0x02, 0x2E, 0x16, 0x59, 0x14,
+	0x01, 0x81, 0x00, 0x0F, 0x06, 0x02, 0x2E, 0x16, 0x03, 0x00, 0x14, 0x06,
+	0x16, 0x59, 0x02, 0x00, 0x14, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x0F, 0x06,
+	0x02, 0x2E, 0x16, 0x01, 0x08, 0x0B, 0x07, 0x03, 0x00, 0x04, 0x67, 0x13,
+	0x02, 0x00, 0x00, 0x00, 0x53, 0x14, 0x01, 0x81, 0x7F, 0x0E, 0x06, 0x08,
+	0x5C, 0x01, 0x00, 0x36, 0x1F, 0x01, 0x00, 0x00, 0x14, 0x36, 0x1F, 0x36,
+	0x22, 0x4C, 0x01, 0x7F, 0x00, 0x01, 0x59, 0x03, 0x00, 0x02, 0x00, 0x01,
+	0x05, 0x10, 0x01, 0x01, 0x11, 0x18, 0x02, 0x00, 0x01, 0x06, 0x10, 0x14,
+	0x01, 0x01, 0x11, 0x06, 0x02, 0x25, 0x16, 0x01, 0x04, 0x0B, 0x02, 0x00,
+	0x01, 0x1F, 0x11, 0x14, 0x01, 0x1F, 0x0D, 0x06, 0x02, 0x26, 0x16, 0x07,
+	0x00, 0x00, 0x14, 0x05, 0x05, 0x01, 0x00, 0x01, 0x7F, 0x00, 0x57, 0x00,
+	0x00, 0x14, 0x05, 0x02, 0x29, 0x16, 0x23, 0x5A, 0x00, 0x00, 0x1B, 0x14,
+	0x01, 0x00, 0x0F, 0x06, 0x01, 0x00, 0x13, 0x12, 0x04, 0x74, 0x00, 0x01,
+	0x01, 0x00, 0x00, 0x5D, 0x13, 0x00, 0x00, 0x14, 0x06, 0x07, 0x5E, 0x14,
+	0x06, 0x01, 0x12, 0x04, 0x76, 0x00, 0x00, 0x01, 0x00, 0x19, 0x1A, 0x09,
+	0x24, 0x00
+};
+
+static const uint16_t t0_caddr[] = {
+	0,
+	5,
+	10,
+	14,
+	18,
+	22,
+	26,
+	30,
+	34,
+	38,
+	42,
+	46,
+	50,
+	54,
+	58,
+	62,
+	66,
+	70,
+	75,
+	80,
+	84,
+	89,
+	93,
+	97,
+	101,
+	107,
+	113,
+	118,
+	126,
+	134,
+	140,
+	163,
+	244,
+	311,
+	329,
+	404,
+	408,
+	412,
+	429,
+	434,
+	505,
+	519,
+	526,
+	540,
+	573,
+	582,
+	587,
+	654,
+	665,
+	721,
+	725,
+	730,
+	778,
+	804,
+	848,
+	859,
+	868,
+	881,
+	885,
+	889,
+	901
+};
+
+#define T0_INTERPRETED   34
+
+#define T0_ENTER(ip, rp, slot)   do { \
+		const unsigned char *t0_newip; \
+		uint32_t t0_lnum; \
+		t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
+		t0_lnum = t0_parse7E_unsigned(&t0_newip); \
+		(rp) += t0_lnum; \
+		*((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
+		(ip) = t0_newip; \
+	} while (0)
+
+#define T0_DEFENTRY(name, slot) \
+void \
+name(void *ctx) \
+{ \
+	t0_context *t0ctx = ctx; \
+	t0ctx->ip = &t0_codeblock[0]; \
+	T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
+}
+
+T0_DEFENTRY(br_skey_decoder_init_main, 73)
+
+#define T0_NEXT(t0ipp)   (*(*(t0ipp)) ++)
+
+void
+br_skey_decoder_run(void *t0ctx)
+{
+	uint32_t *dp, *rp;
+	const unsigned char *ip;
+
+#define T0_LOCAL(x)    (*(rp - 2 - (x)))
+#define T0_POP()       (*-- dp)
+#define T0_POPi()      (*(int32_t *)(-- dp))
+#define T0_PEEK(x)     (*(dp - 1 - (x)))
+#define T0_PEEKi(x)    (*(int32_t *)(dp - 1 - (x)))
+#define T0_PUSH(v)     do { *dp = (v); dp ++; } while (0)
+#define T0_PUSHi(v)    do { *(int32_t *)dp = (v); dp ++; } while (0)
+#define T0_RPOP()      (*-- rp)
+#define T0_RPOPi()     (*(int32_t *)(-- rp))
+#define T0_RPUSH(v)    do { *rp = (v); rp ++; } while (0)
+#define T0_RPUSHi(v)   do { *(int32_t *)rp = (v); rp ++; } while (0)
+#define T0_ROLL(x)     do { \
+	size_t t0len = (size_t)(x); \
+	uint32_t t0tmp = *(dp - 1 - t0len); \
+	memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_SWAP()      do { \
+	uint32_t t0tmp = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_ROT()       do { \
+	uint32_t t0tmp = *(dp - 3); \
+	*(dp - 3) = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_NROT()       do { \
+	uint32_t t0tmp = *(dp - 1); \
+	*(dp - 1) = *(dp - 2); \
+	*(dp - 2) = *(dp - 3); \
+	*(dp - 3) = t0tmp; \
+} while (0)
+#define T0_PICK(x)      do { \
+	uint32_t t0depth = (x); \
+	T0_PUSH(T0_PEEK(t0depth)); \
+} while (0)
+#define T0_CO()         do { \
+	goto t0_exit; \
+} while (0)
+#define T0_RET()        goto t0_next
+
+	dp = ((t0_context *)t0ctx)->dp;
+	rp = ((t0_context *)t0ctx)->rp;
+	ip = ((t0_context *)t0ctx)->ip;
+	goto t0_next;
+	for (;;) {
+		uint32_t t0x;
+
+	t0_next:
+		t0x = T0_NEXT(&ip);
+		if (t0x < T0_INTERPRETED) {
+			switch (t0x) {
+				int32_t t0off;
+
+			case 0: /* ret */
+				t0x = T0_RPOP();
+				rp -= (t0x >> 16);
+				t0x &= 0xFFFF;
+				if (t0x == 0) {
+					ip = NULL;
+					goto t0_exit;
+				}
+				ip = &t0_codeblock[t0x];
+				break;
+			case 1: /* literal constant */
+				T0_PUSHi(t0_parse7E_signed(&ip));
+				break;
+			case 2: /* read local */
+				T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
+				break;
+			case 3: /* write local */
+				T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
+				break;
+			case 4: /* jump */
+				t0off = t0_parse7E_signed(&ip);
+				ip += t0off;
+				break;
+			case 5: /* jump if */
+				t0off = t0_parse7E_signed(&ip);
+				if (T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 6: /* jump if not */
+				t0off = t0_parse7E_signed(&ip);
+				if (!T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 7: {
+				/* + */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a + b);
+
+				}
+				break;
+			case 8: {
+				/* - */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a - b);
+
+				}
+				break;
+			case 9: {
+				/* -rot */
+ T0_NROT(); 
+				}
+				break;
+			case 10: {
+				/* < */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a < b));
+
+				}
+				break;
+			case 11: {
+				/* << */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x << c);
+
+				}
+				break;
+			case 12: {
+				/* <> */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a != b));
+
+				}
+				break;
+			case 13: {
+				/* = */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a == b));
+
+				}
+				break;
+			case 14: {
+				/* > */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a > b));
+
+				}
+				break;
+			case 15: {
+				/* >= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a >= b));
+
+				}
+				break;
+			case 16: {
+				/* >> */
+
+	int c = (int)T0_POPi();
+	int32_t x = T0_POPi();
+	T0_PUSHi(x >> c);
+
+				}
+				break;
+			case 17: {
+				/* and */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a & b);
+
+				}
+				break;
+			case 18: {
+				/* co */
+ T0_CO(); 
+				}
+				break;
+			case 19: {
+				/* drop */
+ (void)T0_POP(); 
+				}
+				break;
+			case 20: {
+				/* dup */
+ T0_PUSH(T0_PEEK(0)); 
+				}
+				break;
+			case 21: {
+				/* eqOID */
+
+	const unsigned char *a2 = &t0_datablock[T0_POP()];
+	const unsigned char *a1 = &CTX->pad[0];
+	size_t len = a1[0];
+	int x;
+	if (len == a2[0]) {
+		x = -(memcmp(a1 + 1, a2 + 1, len) == 0);
+	} else {
+		x = 0;
+	}
+	T0_PUSH((uint32_t)x);
+
+				}
+				break;
+			case 22: {
+				/* fail */
+
+	CTX->err = T0_POPi();
+	T0_CO();
+
+				}
+				break;
+			case 23: {
+				/* get8 */
+
+	uint32_t addr = T0_POP();
+	T0_PUSH(*((unsigned char *)CTX + addr));
+
+				}
+				break;
+			case 24: {
+				/* neg */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(-a);
+
+				}
+				break;
+			case 25: {
+				/* over */
+ T0_PUSH(T0_PEEK(1)); 
+				}
+				break;
+			case 26: {
+				/* read-blob-inner */
+
+	uint32_t len = T0_POP();
+	uint32_t addr = T0_POP();
+	size_t clen = CTX->hlen;
+	if (clen > len) {
+		clen = (size_t)len;
+	}
+	if (addr != 0) {
+		memcpy((unsigned char *)CTX + addr, CTX->hbuf, clen);
+	}
+	CTX->hbuf += clen;
+	CTX->hlen -= clen;
+	T0_PUSH(addr + clen);
+	T0_PUSH(len - clen);
+
+				}
+				break;
+			case 27: {
+				/* read8-low */
+
+	if (CTX->hlen == 0) {
+		T0_PUSHi(-1);
+	} else {
+		CTX->hlen --;
+		T0_PUSH(*CTX->hbuf ++);
+	}
+
+				}
+				break;
+			case 28: {
+				/* rot */
+ T0_ROT(); 
+				}
+				break;
+			case 29: {
+				/* set-ec-key */
+
+	size_t xlen = T0_POP();
+	uint32_t curve = T0_POP();
+	CTX->key.ec.curve = curve;
+	CTX->key.ec.x = CTX->key_data;
+	CTX->key.ec.xlen = xlen;
+
+				}
+				break;
+			case 30: {
+				/* set-rsa-key */
+
+	size_t iqlen = T0_POP();
+	size_t dqlen = T0_POP();
+	size_t dplen = T0_POP();
+	size_t qlen = T0_POP();
+	size_t plen = T0_POP();
+	uint32_t n_bitlen = T0_POP();
+	size_t off;
+
+	CTX->key.rsa.n_bitlen = n_bitlen;
+	CTX->key.rsa.p = CTX->key_data;
+	CTX->key.rsa.plen = plen;
+	off = plen;
+	CTX->key.rsa.q = CTX->key_data + off;
+	CTX->key.rsa.qlen = qlen;
+	off += qlen;
+	CTX->key.rsa.dp = CTX->key_data + off;
+	CTX->key.rsa.dplen = dplen;
+	off += dplen;
+	CTX->key.rsa.dq = CTX->key_data + off;
+	CTX->key.rsa.dqlen = dqlen;
+	off += dqlen;
+	CTX->key.rsa.iq = CTX->key_data + off;
+	CTX->key.rsa.iqlen = iqlen;
+
+				}
+				break;
+			case 31: {
+				/* set8 */
+
+	uint32_t addr = T0_POP();
+	*((unsigned char *)CTX + addr) = (unsigned char)T0_POP();
+
+				}
+				break;
+			case 32: {
+				/* swap */
+ T0_SWAP(); 
+				}
+				break;
+			case 33: {
+				/* u>> */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x >> c);
+
+				}
+				break;
+			}
+
+		} else {
+			T0_ENTER(ip, rp, t0x);
+		}
+	}
+t0_exit:
+	((t0_context *)t0ctx)->dp = dp;
+	((t0_context *)t0ctx)->rp = rp;
+	((t0_context *)t0ctx)->ip = ip;
+}
diff --git a/third_party/bearssl/src/ssl_ccert_single_ec.c b/third_party/bearssl/src/ssl_ccert_single_ec.c
new file mode 100644
index 0000000..93ebcde
--- /dev/null
+++ b/third_party/bearssl/src/ssl_ccert_single_ec.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static void
+cc_none0(const br_ssl_client_certificate_class **pctx)
+{
+	(void)pctx;
+}
+
+static void
+cc_none1(const br_ssl_client_certificate_class **pctx, size_t len)
+{
+	(void)pctx;
+	(void)len;
+}
+
+static void
+cc_none2(const br_ssl_client_certificate_class **pctx,
+	const unsigned char *data, size_t len)
+{
+	(void)pctx;
+	(void)data;
+	(void)len;
+}
+
+static void
+cc_choose(const br_ssl_client_certificate_class **pctx,
+	const br_ssl_client_context *cc, uint32_t auth_types,
+	br_ssl_client_certificate *choices)
+{
+	br_ssl_client_certificate_ec_context *zc;
+	int x;
+	int scurve;
+
+	zc = (br_ssl_client_certificate_ec_context *)pctx;
+	scurve = br_ssl_client_get_server_curve(cc);
+
+	if ((zc->allowed_usages & BR_KEYTYPE_KEYX) != 0
+		&& scurve == zc->sk->curve)
+	{
+		int x;
+
+		x = (zc->issuer_key_type == BR_KEYTYPE_RSA) ? 16 : 17;
+		if (((auth_types >> x) & 1) != 0) {
+			choices->auth_type = BR_AUTH_ECDH;
+			choices->hash_id = -1;
+			choices->chain = zc->chain;
+			choices->chain_len = zc->chain_len;
+		}
+	}
+
+	/*
+	 * For ECDSA authentication, we must choose an appropriate
+	 * hash function.
+	 */
+	x = br_ssl_choose_hash((unsigned)(auth_types >> 8));
+	if (x == 0 || (zc->allowed_usages & BR_KEYTYPE_SIGN) == 0) {
+		memset(choices, 0, sizeof *choices);
+		return;
+	}
+	choices->auth_type = BR_AUTH_ECDSA;
+	choices->hash_id = x;
+	choices->chain = zc->chain;
+	choices->chain_len = zc->chain_len;
+}
+
+static uint32_t
+cc_do_keyx(const br_ssl_client_certificate_class **pctx,
+	unsigned char *data, size_t *len)
+{
+	br_ssl_client_certificate_ec_context *zc;
+	uint32_t r;
+	size_t xoff, xlen;
+
+	zc = (br_ssl_client_certificate_ec_context *)pctx;
+	r = zc->iec->mul(data, *len, zc->sk->x, zc->sk->xlen, zc->sk->curve);
+	xoff = zc->iec->xoff(zc->sk->curve, &xlen);
+	memmove(data, data + xoff, xlen);
+	*len = xlen;
+	return r;
+}
+
+static size_t
+cc_do_sign(const br_ssl_client_certificate_class **pctx,
+	int hash_id, size_t hv_len, unsigned char *data, size_t len)
+{
+	br_ssl_client_certificate_ec_context *zc;
+	unsigned char hv[64];
+	const br_hash_class *hc;
+
+	zc = (br_ssl_client_certificate_ec_context *)pctx;
+	memcpy(hv, data, hv_len);
+	hc = br_multihash_getimpl(zc->mhash, hash_id);
+	if (hc == NULL) {
+		return 0;
+	}
+	if (len < 139) {
+		return 0;
+	}
+	return zc->iecdsa(zc->iec, hc, hv, zc->sk, data);
+}
+
+static const br_ssl_client_certificate_class ccert_vtable = {
+	sizeof(br_ssl_client_certificate_ec_context),
+	cc_none0, /* start_name_list */
+	cc_none1, /* start_name */
+	cc_none2, /* append_name */
+	cc_none0, /* end_name */
+	cc_none0, /* end_name_list */
+	cc_choose,
+	cc_do_keyx,
+	cc_do_sign
+};
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_set_single_ec(br_ssl_client_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk, unsigned allowed_usages,
+	unsigned cert_issuer_key_type,
+	const br_ec_impl *iec, br_ecdsa_sign iecdsa)
+{
+	cc->client_auth.single_ec.vtable = &ccert_vtable;
+	cc->client_auth.single_ec.chain = chain;
+	cc->client_auth.single_ec.chain_len = chain_len;
+	cc->client_auth.single_ec.sk = sk;
+	cc->client_auth.single_ec.allowed_usages = allowed_usages;
+	cc->client_auth.single_ec.issuer_key_type = cert_issuer_key_type;
+	cc->client_auth.single_ec.mhash = &cc->eng.mhash;
+	cc->client_auth.single_ec.iec = iec;
+	cc->client_auth.single_ec.iecdsa = iecdsa;
+	cc->client_auth_vtable = &cc->client_auth.single_ec.vtable;
+}
diff --git a/third_party/bearssl/src/ssl_ccert_single_rsa.c b/third_party/bearssl/src/ssl_ccert_single_rsa.c
new file mode 100644
index 0000000..690df20
--- /dev/null
+++ b/third_party/bearssl/src/ssl_ccert_single_rsa.c
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static void
+cc_none0(const br_ssl_client_certificate_class **pctx)
+{
+	(void)pctx;
+}
+
+static void
+cc_none1(const br_ssl_client_certificate_class **pctx, size_t len)
+{
+	(void)pctx;
+	(void)len;
+}
+
+static void
+cc_none2(const br_ssl_client_certificate_class **pctx,
+	const unsigned char *data, size_t len)
+{
+	(void)pctx;
+	(void)data;
+	(void)len;
+}
+
+static void
+cc_choose(const br_ssl_client_certificate_class **pctx,
+	const br_ssl_client_context *cc, uint32_t auth_types,
+	br_ssl_client_certificate *choices)
+{
+	br_ssl_client_certificate_rsa_context *zc;
+	int x;
+
+	(void)cc;
+	zc = (br_ssl_client_certificate_rsa_context *)pctx;
+	x = br_ssl_choose_hash((unsigned)auth_types);
+	if (x == 0 && (auth_types & 1) == 0) {
+		memset(choices, 0, sizeof *choices);
+	}
+	choices->auth_type = BR_AUTH_RSA;
+	choices->hash_id = x;
+	choices->chain = zc->chain;
+	choices->chain_len = zc->chain_len;
+}
+
+/*
+ * OID for hash functions in RSA signatures.
+ */
+static const unsigned char HASH_OID_SHA1[] = {
+	0x05, 0x2B, 0x0E, 0x03, 0x02, 0x1A
+};
+
+static const unsigned char HASH_OID_SHA224[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04
+};
+
+static const unsigned char HASH_OID_SHA256[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01
+};
+
+static const unsigned char HASH_OID_SHA384[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02
+};
+
+static const unsigned char HASH_OID_SHA512[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03
+};
+
+static const unsigned char *HASH_OID[] = {
+	HASH_OID_SHA1,
+	HASH_OID_SHA224,
+	HASH_OID_SHA256,
+	HASH_OID_SHA384,
+	HASH_OID_SHA512
+};
+
+static size_t
+cc_do_sign(const br_ssl_client_certificate_class **pctx,
+	int hash_id, size_t hv_len, unsigned char *data, size_t len)
+{
+	br_ssl_client_certificate_rsa_context *zc;
+	unsigned char hv[64];
+	const unsigned char *hash_oid;
+	size_t sig_len;
+
+	zc = (br_ssl_client_certificate_rsa_context *)pctx;
+	memcpy(hv, data, hv_len);
+	if (hash_id == 0) {
+		hash_oid = NULL;
+	} else if (hash_id >= 2 && hash_id <= 6) {
+		hash_oid = HASH_OID[hash_id - 2];
+	} else {
+		return 0;
+	}
+	sig_len = (zc->sk->n_bitlen + 7) >> 3;
+	if (len < sig_len) {
+		return 0;
+	}
+	return zc->irsasign(hash_oid, hv, hv_len, zc->sk, data) ? sig_len : 0;
+}
+
+static const br_ssl_client_certificate_class ccert_vtable = {
+	sizeof(br_ssl_client_certificate_rsa_context),
+	cc_none0, /* start_name_list */
+	cc_none1, /* start_name */
+	cc_none2, /* append_name */
+	cc_none0, /* end_name */
+	cc_none0, /* end_name_list */
+	cc_choose,
+	0,
+	cc_do_sign
+};
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_set_single_rsa(br_ssl_client_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk, br_rsa_pkcs1_sign irsasign)
+{
+	cc->client_auth.single_rsa.vtable = &ccert_vtable;
+	cc->client_auth.single_rsa.chain = chain;
+	cc->client_auth.single_rsa.chain_len = chain_len;
+	cc->client_auth.single_rsa.sk = sk;
+	cc->client_auth.single_rsa.irsasign = irsasign;
+	cc->client_auth_vtable = &cc->client_auth.single_rsa.vtable;
+}
diff --git a/third_party/bearssl/src/ssl_client.c b/third_party/bearssl/src/ssl_client.c
new file mode 100644
index 0000000..28c404b
--- /dev/null
+++ b/third_party/bearssl/src/ssl_client.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_zero(br_ssl_client_context *cc)
+{
+	/*
+	 * For really standard C, we should explicitly set to NULL all
+	 * pointers, and 0 all other fields. However, on all our target
+	 * architectures, a direct memset() will work, be faster, and
+	 * use a lot less code.
+	 */
+	memset(cc, 0, sizeof *cc);
+}
+
+/* see bearssl_ssl.h */
+int
+br_ssl_client_reset(br_ssl_client_context *cc,
+	const char *server_name, int resume_session)
+{
+	size_t n;
+
+	br_ssl_engine_set_buffer(&cc->eng, NULL, 0, 0);
+	cc->eng.version_out = cc->eng.version_min;
+	if (!resume_session) {
+		br_ssl_client_forget_session(cc);
+	}
+	if (!br_ssl_engine_init_rand(&cc->eng)) {
+		return 0;
+	}
+
+	/*
+	 * We always set back the "reneg" flag to 0 because we use it
+	 * to distinguish between first handshake and renegotiation.
+	 * Note that "renegotiation" and "session resumption" are two
+	 * different things.
+	 */
+	cc->eng.reneg = 0;
+
+	if (server_name == NULL) {
+		cc->eng.server_name[0] = 0;
+	} else {
+		n = strlen(server_name) + 1;
+		if (n > sizeof cc->eng.server_name) {
+			br_ssl_engine_fail(&cc->eng, BR_ERR_BAD_PARAM);
+			return 0;
+		}
+		memcpy(cc->eng.server_name, server_name, n);
+	}
+
+	br_ssl_engine_hs_reset(&cc->eng,
+		br_ssl_hs_client_init_main, br_ssl_hs_client_run);
+	return br_ssl_engine_last_error(&cc->eng) == BR_ERR_OK;
+}
diff --git a/third_party/bearssl/src/ssl_client_default_rsapub.c b/third_party/bearssl/src/ssl_client_default_rsapub.c
new file mode 100644
index 0000000..2cdaab8
--- /dev/null
+++ b/third_party/bearssl/src/ssl_client_default_rsapub.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_set_default_rsapub(br_ssl_client_context *cc)
+{
+	br_ssl_client_set_rsapub(cc, br_rsa_public_get_default());
+}
diff --git a/third_party/bearssl/src/ssl_client_full.c b/third_party/bearssl/src/ssl_client_full.c
new file mode 100644
index 0000000..9814349
--- /dev/null
+++ b/third_party/bearssl/src/ssl_client_full.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_client_init_full(br_ssl_client_context *cc,
+	br_x509_minimal_context *xc,
+	const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num)
+{
+	/*
+	 * The "full" profile supports all implemented cipher suites.
+	 *
+	 * Rationale for suite order, from most important to least
+	 * important rule:
+	 *
+	 * -- Don't use 3DES if AES or ChaCha20 is available.
+	 * -- Try to have Forward Secrecy (ECDHE suite) if possible.
+	 * -- When not using Forward Secrecy, ECDH key exchange is
+	 *    better than RSA key exchange (slightly more expensive on the
+	 *    client, but much cheaper on the server, and it implies smaller
+	 *    messages).
+	 * -- ChaCha20+Poly1305 is better than AES/GCM (faster, smaller code).
+	 * -- GCM is better than CCM and CBC. CCM is better than CBC.
+	 * -- CCM is preferable over CCM_8 (with CCM_8, forgeries may succeed
+	 *    with probability 2^(-64)).
+	 * -- AES-128 is preferred over AES-256 (AES-128 is already
+	 *    strong enough, and AES-256 is 40% more expensive).
+	 */
+	static const uint16_t suites[] = {
+		BR_TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256,
+		BR_TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_CCM,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_CCM,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_CCM_8,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_CCM_8,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384,
+		BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384,
+		BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384,
+		BR_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_RSA_WITH_AES_128_GCM_SHA256,
+		BR_TLS_RSA_WITH_AES_256_GCM_SHA384,
+		BR_TLS_RSA_WITH_AES_128_CCM,
+		BR_TLS_RSA_WITH_AES_256_CCM,
+		BR_TLS_RSA_WITH_AES_128_CCM_8,
+		BR_TLS_RSA_WITH_AES_256_CCM_8,
+		BR_TLS_RSA_WITH_AES_128_CBC_SHA256,
+		BR_TLS_RSA_WITH_AES_256_CBC_SHA256,
+		BR_TLS_RSA_WITH_AES_128_CBC_SHA,
+		BR_TLS_RSA_WITH_AES_256_CBC_SHA,
+		BR_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA,
+		BR_TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA,
+		BR_TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA,
+		BR_TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA,
+		BR_TLS_RSA_WITH_3DES_EDE_CBC_SHA
+	};
+
+	/*
+	 * All hash functions are activated.
+	 * Note: the X.509 validation engine will nonetheless refuse to
+	 * validate signatures that use MD5 as hash function.
+	 */
+	static const br_hash_class *hashes[] = {
+		&br_md5_vtable,
+		&br_sha1_vtable,
+		&br_sha224_vtable,
+		&br_sha256_vtable,
+		&br_sha384_vtable,
+		&br_sha512_vtable
+	};
+
+	int id;
+
+	/*
+	 * Reset client context and set supported versions from TLS-1.0
+	 * to TLS-1.2 (inclusive).
+	 */
+	br_ssl_client_zero(cc);
+	br_ssl_engine_set_versions(&cc->eng, BR_TLS10, BR_TLS12);
+
+	/*
+	 * X.509 engine uses SHA-256 to hash certificate DN (for
+	 * comparisons).
+	 */
+	br_x509_minimal_init(xc, &br_sha256_vtable,
+		trust_anchors, trust_anchors_num);
+
+	/*
+	 * Set suites and asymmetric crypto implementations. We use the
+	 * "i31" code for RSA (it is somewhat faster than the "i32"
+	 * implementation).
+	 * TODO: change that when better implementations are made available.
+	 */
+	br_ssl_engine_set_suites(&cc->eng, suites,
+		(sizeof suites) / (sizeof suites[0]));
+	br_ssl_client_set_default_rsapub(cc);
+	br_ssl_engine_set_default_rsavrfy(&cc->eng);
+	br_ssl_engine_set_default_ecdsa(&cc->eng);
+	br_x509_minimal_set_rsa(xc, br_ssl_engine_get_rsavrfy(&cc->eng));
+	br_x509_minimal_set_ecdsa(xc,
+		br_ssl_engine_get_ec(&cc->eng),
+		br_ssl_engine_get_ecdsa(&cc->eng));
+
+	/*
+	 * Set supported hash functions, for the SSL engine and for the
+	 * X.509 engine.
+	 */
+	for (id = br_md5_ID; id <= br_sha512_ID; id ++) {
+		const br_hash_class *hc;
+
+		hc = hashes[id - 1];
+		br_ssl_engine_set_hash(&cc->eng, id, hc);
+		br_x509_minimal_set_hash(xc, id, hc);
+	}
+
+	/*
+	 * Link the X.509 engine in the SSL engine.
+	 */
+	br_ssl_engine_set_x509(&cc->eng, &xc->vtable);
+
+	/*
+	 * Set the PRF implementations.
+	 */
+	br_ssl_engine_set_prf10(&cc->eng, &br_tls10_prf);
+	br_ssl_engine_set_prf_sha256(&cc->eng, &br_tls12_sha256_prf);
+	br_ssl_engine_set_prf_sha384(&cc->eng, &br_tls12_sha384_prf);
+
+	/*
+	 * Symmetric encryption. We use the "default" implementations
+	 * (fastest among constant-time implementations).
+	 */
+	br_ssl_engine_set_default_aes_cbc(&cc->eng);
+	br_ssl_engine_set_default_aes_ccm(&cc->eng);
+	br_ssl_engine_set_default_aes_gcm(&cc->eng);
+	br_ssl_engine_set_default_des_cbc(&cc->eng);
+	br_ssl_engine_set_default_chapol(&cc->eng);
+}
diff --git a/third_party/bearssl/src/ssl_engine.c b/third_party/bearssl/src/ssl_engine.c
new file mode 100644
index 0000000..f59fe1a
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine.c
@@ -0,0 +1,1584 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#if 0
+/* obsolete */
+
+/*
+ * If BR_USE_URANDOM is not defined, then try to autodetect its presence
+ * through compiler macros.
+ */
+#ifndef BR_USE_URANDOM
+
+/*
+ * Macro values documented on:
+ *    https://sourceforge.net/p/predef/wiki/OperatingSystems/
+ *
+ * Only the most common systems have been included here for now. This
+ * should be enriched later on.
+ */
+#if defined _AIX \
+	|| defined __ANDROID__ \
+	|| defined __FreeBSD__ \
+	|| defined __NetBSD__ \
+	|| defined __OpenBSD__ \
+	|| defined __DragonFly__ \
+	|| defined __linux__ \
+	|| (defined __sun && (defined __SVR4 || defined __svr4__)) \
+	|| (defined __APPLE__ && defined __MACH__)
+#define BR_USE_URANDOM   1
+#endif
+
+#endif
+
+/*
+ * If BR_USE_WIN32_RAND is not defined, perform autodetection here.
+ */
+#ifndef BR_USE_WIN32_RAND
+
+#if defined _WIN32 || defined _WIN64
+#define BR_USE_WIN32_RAND   1
+#endif
+
+#endif
+
+#if BR_USE_URANDOM
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#endif
+
+#if BR_USE_WIN32_RAND
+#include <windows.h>
+#include <wincrypt.h>
+#pragma comment(lib, "advapi32")
+#endif
+
+#endif
+
+/* ==================================================================== */
+/*
+ * This part of the file does the low-level record management.
+ */
+
+/*
+ * IMPLEMENTATION NOTES
+ * ====================
+ *
+ * In this file, we designate by "input" (and the "i" letter) the "recv"
+ * operations: incoming records from the peer, from which payload data
+ * is obtained, and must be extracted by the application (or the SSL
+ * handshake engine). Similarly, "output" (and the "o" letter) is for
+ * "send": payload data injected by the application (and SSL handshake
+ * engine), to be wrapped into records, that are then conveyed to the
+ * peer over the transport medium.
+ *
+ * The input and output buffers may be distinct or shared. When
+ * shared, input and output cannot occur concurrently; the caller
+ * must make sure that it never needs to output data while input
+ * data has been received. In practice, a shared buffer prevents
+ * pipelining of HTTP requests, or similar protocols; however, a
+ * shared buffer saves RAM.
+ *
+ * The input buffer is pointed to by 'ibuf' and has size 'ibuf_len';
+ * the output buffer is pointed to by 'obuf' and has size 'obuf_len'.
+ * From the size of these buffers is derived the maximum fragment
+ * length, which will be honoured upon sending records; regardless of
+ * that length, incoming records will be processed as long as they
+ * fit in the input buffer, and their length still complies with the
+ * protocol specification (maximum plaintext payload length is 16384
+ * bytes).
+ *
+ * Three registers are used to manage buffering in ibuf, called ixa,
+ * ixb and ixc. Similarly, three registers are used to manage buffering
+ * in obuf, called oxa, oxb and oxc.
+ *
+ *
+ * At any time, the engine is in one of the following modes:
+ * -- Failed mode: an error occurs, no I/O can happen.
+ * -- Input mode: the engine can either receive record bytes from the
+ * transport layer, or it has some buffered payload bytes to yield.
+ * -- Output mode: the engine can either receive payload bytes, or it
+ * has some record bytes to send to the transport layer.
+ * -- Input/Output mode: both input and output modes are active. When
+ * the buffer is shared, this can happen only when the buffer is empty
+ * (no buffered payload bytes or record bytes in either direction).
+ *
+ *
+ * Failed mode:
+ * ------------
+ *
+ * I/O failed for some reason (invalid received data, not enough room
+ * for the next record...). No I/O may ever occur again for this context,
+ * until an explicit reset is performed. This mode, and the error code,
+ * are also used for protocol errors, especially handshake errors.
+ *
+ *
+ * Input mode:
+ * -----------
+ *
+ *  ixa   index within ibuf[] for the currently read data
+ *  ixb   maximum index within ibuf[] for the currently read data
+ *  ixc   number of bytes not yet received for the current record
+ * 
+ * -- When ixa == ixb, there is no available data for readers. When
+ * ixa != ixb, there is available data and it starts at offset ixa.
+ *
+ * -- When waiting for the next record header, ixa and ixb are equal
+ * and contain a value ranging from 0 to 4; ixc is equal to 5-ixa.
+ *
+ * -- When the header has been received, record data is obtained. The
+ * ixc field records how many bytes are still needed to reach the
+ * end of the current record.
+ *
+ *    ** If encryption is active, then ixa and ixb are kept equal, and
+ *    point to the end of the currently received record bytes. When
+ *    ixc reaches 0, decryption/MAC is applied, and ixa and ixb are
+ *    adjusted.
+ *
+ *    ** If encryption is not active, then ixa and ixb are distinct
+ *    and data can be read right away. Additional record data is
+ *    obtained only when ixa == ixb.
+ *
+ * Note: in input mode and no encryption, records larger than the buffer
+ * size are allowed. When encryption is active, the complete record must
+ * fit within the buffer, since it cannot be decrypted/MACed until it
+ * has been completely received.
+ *
+ * -- When receiving the next record header, 'version_in' contains the
+ * expected input version (0 if not expecting a specific version); on
+ * mismatch, the mode switches to 'failed'.
+ *
+ * -- When the header has been received, 'version_in' contains the received
+ * version. It is up to the caller to check and adjust the 'version_in' field
+ * to implement the required semantics.
+ *
+ * -- The 'record_type_in' field is updated with the incoming record type
+ * when the next record header has been received.
+ *
+ *
+ * Output mode:
+ * ------------
+ *
+ *  oxa   index within obuf[] for the currently accumulated data
+ *  oxb   maximum index within obuf[] for record data
+ *  oxc   pointer for start of record data, and for record sending
+ *
+ * -- When oxa != oxb, more data can be accumulated into the current
+ * record; when oxa == oxb, a closed record is being sent.
+ *
+ * -- When accumulating data, oxc points to the start of the data.
+ *
+ * -- During record sending, oxa (and oxb) point to the next record byte
+ * to send, and oxc indicates the end of the current record.
+ *
+ * Note: sent records must fit within the buffer, since the header is
+ * adjusted only when the complete record has been assembled.
+ *
+ * -- The 'version_out' and 'record_type_out' fields are used to build the
+ * record header when the mode is switched to 'sending'.
+ *
+ *
+ * Modes:
+ * ------
+ *
+ * The state register iomode contains one of the following values:
+ *
+ *  BR_IO_FAILED   I/O failed
+ *  BR_IO_IN       input mode
+ *  BR_IO_OUT      output mode
+ *  BR_IO_INOUT    input/output mode
+ *
+ * Whether encryption is active on incoming records is indicated by the
+ * incrypt flag. For outgoing records, there is no such flag; "encryption"
+ * is always considered active, but initially uses functions that do not
+ * encrypt anything. The 'incrypt' flag is needed because when there is
+ * no active encryption, records larger than the I/O buffer are accepted.
+ *
+ * Note: we do not support no-encryption modes (MAC only).
+ *
+ * TODO: implement GCM support
+ *
+ *
+ * Misc:
+ * -----
+ *
+ * 'max_frag_len' is the maximum plaintext size for an outgoing record.
+ * By default, it is set to the maximum value that fits in the provided
+ * buffers, in the following list: 512, 1024, 2048, 4096, 16384. The
+ * caller may change it if needed, but the new value MUST still fit in
+ * the buffers, and it MUST be one of the list above for compatibility
+ * with the Maximum Fragment Length extension.
+ *
+ * For incoming records, only the total buffer length and current
+ * encryption mode impact the maximum length for incoming records. The
+ * 'max_frag_len' value is still adjusted so that records up to that
+ * length can be both received and sent.
+ *
+ *
+ * Offsets and lengths:
+ * --------------------
+ *
+ * When sending fragments with TLS-1.1+, the maximum overhead is:
+ *   5 bytes for the record header
+ *   16 bytes for the explicit IV
+ *   48 bytes for the MAC (HMAC/SHA-384)
+ *   16 bytes for the padding (AES)
+ * so a total of 85 extra bytes. Note that we support block cipher sizes
+ * up to 16 bytes (AES) and HMAC output sizes up to 48 bytes (SHA-384).
+ *
+ * With TLS-1.0 and CBC mode, we apply a 1/n-1 split, for a maximum
+ * overhead of:
+ *   5 bytes for the first record header
+ *   32 bytes for the first record payload (AES-CBC + HMAC/SHA-1)
+ *   5 bytes for the second record header
+ *   20 bytes for the MAC (HMAC/SHA-1)
+ *   16 bytes for the padding (AES)
+ *   -1 byte to account for the payload byte in the first record
+ * so a total of 77 extra bytes at most, less than the 85 bytes above.
+ * Note that with TLS-1.0, the MAC is HMAC with either MD5 or SHA-1, but
+ * no other hash function.
+ *
+ * The implementation does not try to send larger records when the current
+ * encryption mode has less overhead.
+ *
+ * Maximum input record overhead is:
+ *   5 bytes for the record header
+ *   16 bytes for the explicit IV (TLS-1.1+)
+ *   48 bytes for the MAC (HMAC/SHA-384)
+ *   256 bytes for the padding
+ * so a total of 325 extra bytes.
+ *
+ * When receiving the next record header, it is written into the buffer
+ * bytes 0 to 4 (inclusive). Record data is always written into buf[]
+ * starting at offset 5. When encryption is active, the plaintext data
+ * may start at a larger offset (e.g. because of an explicit IV).
+ */
+
+#define MAX_OUT_OVERHEAD    85
+#define MAX_IN_OVERHEAD    325
+
+/* see inner.h */
+void
+br_ssl_engine_fail(br_ssl_engine_context *rc, int err)
+{
+	if (rc->iomode != BR_IO_FAILED) {
+		rc->iomode = BR_IO_FAILED;
+		rc->err = err;
+	}
+}
+
+/*
+ * Adjust registers for a new incoming record.
+ */
+static void
+make_ready_in(br_ssl_engine_context *rc)
+{
+	rc->ixa = rc->ixb = 0;
+	rc->ixc = 5;
+	if (rc->iomode == BR_IO_IN) {
+		rc->iomode = BR_IO_INOUT;
+	}
+}
+
+/*
+ * Adjust registers for a new outgoing record.
+ */
+static void
+make_ready_out(br_ssl_engine_context *rc)
+{
+	size_t a, b;
+
+	a = 5;
+	b = rc->obuf_len - a;
+	rc->out.vtable->max_plaintext(&rc->out.vtable, &a, &b);
+	if ((b - a) > rc->max_frag_len) {
+		b = a + rc->max_frag_len;
+	}
+	rc->oxa = a;
+	rc->oxb = b;
+	rc->oxc = a;
+	if (rc->iomode == BR_IO_OUT) {
+		rc->iomode = BR_IO_INOUT;
+	}
+}
+
+/* see inner.h */
+void
+br_ssl_engine_new_max_frag_len(br_ssl_engine_context *rc, unsigned max_frag_len)
+{
+	size_t nxb;
+
+	rc->max_frag_len = max_frag_len;
+	nxb = rc->oxc + max_frag_len;
+	if (rc->oxa < rc->oxb && rc->oxb > nxb && rc->oxa < nxb) {
+		rc->oxb = nxb;
+	}
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_buffer(br_ssl_engine_context *rc,
+	void *buf, size_t buf_len, int bidi)
+{
+	if (buf == NULL) {
+		br_ssl_engine_set_buffers_bidi(rc, NULL, 0, NULL, 0);
+	} else {
+		/*
+		 * In bidirectional mode, we want to maximise input
+		 * buffer size, since we support arbitrary fragmentation
+		 * when sending, but the peer will not necessarily
+		 * comply to any low fragment length (in particular if
+		 * we are the server, because the maximum fragment
+		 * length extension is under client control).
+		 *
+		 * We keep a minimum size of 512 bytes for the plaintext
+		 * of our outgoing records.
+		 *
+		 * br_ssl_engine_set_buffers_bidi() will compute the maximum
+		 * fragment length for outgoing records by using the minimum
+		 * of allocated spaces for both input and output records,
+		 * rounded down to a standard length.
+		 */
+		if (bidi) {
+			size_t w;
+
+			if (buf_len < (512 + MAX_IN_OVERHEAD
+				+ 512 + MAX_OUT_OVERHEAD))
+			{
+				rc->iomode = BR_IO_FAILED;
+				rc->err = BR_ERR_BAD_PARAM;
+				return;
+			} else if (buf_len < (16384 + MAX_IN_OVERHEAD
+				+ 512 + MAX_OUT_OVERHEAD))
+			{
+				w = 512 + MAX_OUT_OVERHEAD;
+			} else {
+				w = buf_len - (16384 + MAX_IN_OVERHEAD);
+			}
+			br_ssl_engine_set_buffers_bidi(rc,
+				buf, buf_len - w,
+				(unsigned char *)buf + w, w);
+		} else {
+			br_ssl_engine_set_buffers_bidi(rc,
+				buf, buf_len, NULL, 0);
+		}
+	}
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_buffers_bidi(br_ssl_engine_context *rc,
+	void *ibuf, size_t ibuf_len, void *obuf, size_t obuf_len)
+{
+	rc->iomode = BR_IO_INOUT;
+	rc->incrypt = 0;
+	rc->err = BR_ERR_OK;
+	rc->version_in = 0;
+	rc->record_type_in = 0;
+	rc->version_out = 0;
+	rc->record_type_out = 0;
+	if (ibuf == NULL) {
+		if (rc->ibuf == NULL) {
+			br_ssl_engine_fail(rc, BR_ERR_BAD_PARAM);
+		}
+	} else {
+		unsigned u;
+
+		rc->ibuf = ibuf;
+		rc->ibuf_len = ibuf_len;
+		if (obuf == NULL) {
+			obuf = ibuf;
+			obuf_len = ibuf_len;
+		}
+		rc->obuf = obuf;
+		rc->obuf_len = obuf_len;
+
+		/*
+		 * Compute the maximum fragment length, that fits for
+		 * both incoming and outgoing records. This length will
+		 * be used in fragment length negotiation, so we must
+		 * honour it both ways. Regardless, larger incoming
+		 * records will be accepted, as long as they fit in the
+		 * actual buffer size.
+		 */
+		for (u = 14; u >= 9; u --) {
+			size_t flen;
+
+			flen = (size_t)1 << u;
+			if (obuf_len >= flen + MAX_OUT_OVERHEAD
+				&& ibuf_len >= flen + MAX_IN_OVERHEAD)
+			{
+				break;
+			}
+		}
+		if (u == 8) {
+			br_ssl_engine_fail(rc, BR_ERR_BAD_PARAM);
+			return;
+		} else if (u == 13) {
+			u = 12;
+		}
+		rc->max_frag_len = (size_t)1 << u;
+		rc->log_max_frag_len = u;
+		rc->peer_log_max_frag_len = 0;
+	}
+	rc->out.vtable = &br_sslrec_out_clear_vtable;
+	make_ready_in(rc);
+	make_ready_out(rc);
+}
+
+/*
+ * Clear buffers in both directions.
+ */
+static void
+engine_clearbuf(br_ssl_engine_context *rc)
+{
+	make_ready_in(rc);
+	make_ready_out(rc);
+}
+
+/*
+ * Make sure the internal PRNG is initialised (but not necessarily
+ * seeded properly yet).
+ */
+static int
+rng_init(br_ssl_engine_context *cc)
+{
+	const br_hash_class *h;
+
+	if (cc->rng_init_done != 0) {
+		return 1;
+	}
+
+	/*
+	 * If using TLS-1.2, then SHA-256 or SHA-384 must be present (or
+	 * both); we prefer SHA-256 which is faster for 32-bit systems.
+	 *
+	 * If using TLS-1.0 or 1.1 then SHA-1 must be present.
+	 *
+	 * Though HMAC_DRBG/SHA-1 is, as far as we know, as safe as
+	 * these things can be, we still prefer the SHA-2 functions over
+	 * SHA-1, if only for public relations (known theoretical
+	 * weaknesses of SHA-1 with regards to collisions are mostly
+	 * irrelevant here, but they still make people nervous).
+	 */
+	h = br_multihash_getimpl(&cc->mhash, br_sha256_ID);
+	if (!h) {
+		h = br_multihash_getimpl(&cc->mhash, br_sha384_ID);
+		if (!h) {
+			h = br_multihash_getimpl(&cc->mhash,
+				br_sha1_ID);
+			if (!h) {
+				br_ssl_engine_fail(cc, BR_ERR_BAD_STATE);
+				return 0;
+			}
+		}
+	}
+	br_hmac_drbg_init(&cc->rng, h, NULL, 0);
+	cc->rng_init_done = 1;
+	return 1;
+}
+
+/* see inner.h */
+int
+br_ssl_engine_init_rand(br_ssl_engine_context *cc)
+{
+	if (!rng_init(cc)) {
+		return 0;
+	}
+
+	/*
+	 * We always try OS/hardware seeding once. If it works, then
+	 * we assume proper seeding. If not, then external entropy must
+	 * have been injected; otherwise, we report an error.
+	 */
+	if (!cc->rng_os_rand_done) {
+		br_prng_seeder sd;
+
+		sd = br_prng_seeder_system(NULL);
+		if (sd != 0 && sd(&cc->rng.vtable)) {
+			cc->rng_init_done = 2;
+		}
+		cc->rng_os_rand_done = 1;
+	}
+	if (cc->rng_init_done < 2) {
+		br_ssl_engine_fail(cc, BR_ERR_NO_RANDOM);
+		return 0;
+	}
+	return 1;
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_inject_entropy(br_ssl_engine_context *cc,
+	const void *data, size_t len)
+{
+	/*
+	 * Externally provided entropy is assumed to be "good enough"
+	 * (we cannot really test its quality) so if the RNG structure
+	 * could be initialised at all, then we marked the RNG as
+	 * "properly seeded".
+	 */
+	if (!rng_init(cc)) {
+		return;
+	}
+	br_hmac_drbg_update(&cc->rng, data, len);
+	cc->rng_init_done = 2;
+}
+
+/*
+ * We define a few internal functions that implement the low-level engine
+ * API for I/O; the external API (br_ssl_engine_sendapp_buf() and similar
+ * functions) is built upon these function, with special processing for
+ * records which are not of type "application data".
+ *
+ *   recvrec_buf, recvrec_ack     receives bytes from transport medium
+ *   sendrec_buf, sendrec_ack     send bytes to transport medium
+ *   recvpld_buf, recvpld_ack     receives payload data from engine
+ *   sendpld_buf, sendpld_ack     send payload data to engine
+ */
+
+static unsigned char *
+recvrec_buf(const br_ssl_engine_context *rc, size_t *len)
+{
+	if (rc->shutdown_recv) {
+		*len = 0;
+		return NULL;
+	}
+
+	/*
+	 * Bytes from the transport can be injected only if the mode is
+	 * compatible (in or in/out), and ixa == ixb; ixc then contains
+	 * the number of bytes that are still expected (but it may
+	 * exceed our buffer size).
+	 *
+	 * We cannot get "stuck" here (buffer is full, but still more
+	 * data is expected) because oversized records are detected when
+	 * their header is processed.
+	 */
+	switch (rc->iomode) {
+	case BR_IO_IN:
+	case BR_IO_INOUT:
+		if (rc->ixa == rc->ixb) {
+			size_t z;
+
+			z = rc->ixc;
+			if (z > rc->ibuf_len - rc->ixa) {
+				z = rc->ibuf_len - rc->ixa;
+			}
+			*len = z;
+			return rc->ibuf + rc->ixa;
+		}
+		break;
+	}
+	*len = 0;
+	return NULL;
+}
+
+static void
+recvrec_ack(br_ssl_engine_context *rc, size_t len)
+{
+	unsigned char *pbuf;
+	size_t pbuf_len;
+
+	/*
+	 * Adjust state if necessary (for a shared input/output buffer):
+	 * we got some incoming bytes, so we cannot (temporarily) handle
+	 * outgoing data.
+	 */
+	if (rc->iomode == BR_IO_INOUT && rc->ibuf == rc->obuf) {
+		rc->iomode = BR_IO_IN;
+	}
+
+	/*
+	 * Adjust data pointers.
+	 */
+	rc->ixb = (rc->ixa += len);
+	rc->ixc -= len;
+
+	/*
+	 * If we are receiving a header and did not fully obtained it
+	 * yet, then just wait for the next bytes.
+	 */
+	if (rc->ixa < 5) {
+		return;
+	}
+
+	/*
+	 * If we just obtained a full header, process it.
+	 */
+	if (rc->ixa == 5) {
+		unsigned version;
+		unsigned rlen;
+
+		/*
+		 * Get record type and version. We support only versions
+		 * 3.x (if the version major number does not match, then
+		 * we suppose that the record format is too alien for us
+		 * to process it).
+		 *
+		 * Note: right now, we reject clients that try to send
+		 * a ClientHello in a format compatible with SSL-2.0. It
+		 * is unclear whether this will ever be supported; and
+		 * if we want to support it, then this might be done in
+		 * in the server-specific code, not here.
+		 */
+		rc->record_type_in = rc->ibuf[0];
+		version = br_dec16be(rc->ibuf + 1);
+		if ((version >> 8) != 3) {
+			br_ssl_engine_fail(rc, BR_ERR_UNSUPPORTED_VERSION);
+			return;
+		}
+
+		/*
+		 * We ensure that successive records have the same
+		 * version. The handshake code must check and adjust the
+		 * variables when necessary to accommodate the protocol
+		 * negotiation details.
+		 */
+		if (rc->version_in != 0 && rc->version_in != version) {
+			br_ssl_engine_fail(rc, BR_ERR_BAD_VERSION);
+			return;
+		}
+		rc->version_in = version;
+
+		/*
+		 * Decode record length. We must check that the length
+		 * is valid (relatively to the current encryption mode)
+		 * and also (if encryption is active) that the record
+		 * will fit in our buffer.
+		 *
+		 * When no encryption is active, we can process records
+		 * by chunks, and thus accept any record up to the
+		 * maximum allowed plaintext length (16384 bytes).
+		 */
+		rlen = br_dec16be(rc->ibuf + 3);
+		if (rc->incrypt) {
+			if (!rc->in.vtable->check_length(
+				&rc->in.vtable, rlen))
+			{
+				br_ssl_engine_fail(rc, BR_ERR_BAD_LENGTH);
+				return;
+			}
+			if (rlen > (rc->ibuf_len - 5)) {
+				br_ssl_engine_fail(rc, BR_ERR_TOO_LARGE);
+				return;
+			}
+		} else {
+			if (rlen > 16384) {
+				br_ssl_engine_fail(rc, BR_ERR_BAD_LENGTH);
+				return;
+			}
+		}
+
+		/*
+		 * If the record is completely empty then we must switch
+		 * to a new record. Note that, in that case, we
+		 * completely ignore the record type, which is fitting
+		 * since we received no actual data of that type.
+		 *
+		 * A completely empty record is technically allowed as
+		 * long as encryption/MAC is not active, i.e. before
+		 * completion of the first handshake. It it still weird;
+		 * it might conceptually be useful as a heartbeat or
+		 * keep-alive mechanism while some lengthy operation is
+		 * going on, e.g. interaction with a human user.
+		 */
+		if (rlen == 0) {
+			make_ready_in(rc);
+		} else {
+			rc->ixa = rc->ixb = 5;
+			rc->ixc = rlen;
+		}
+		return;
+	}
+
+	/*
+	 * If there is no active encryption, then the data can be read
+	 * right away. Note that we do not receive bytes from the
+	 * transport medium when we still have payload bytes to be
+	 * acknowledged.
+	 */
+	if (!rc->incrypt) {
+		rc->ixa = 5;
+		return;
+	}
+
+	/*
+	 * Since encryption is active, we must wait for a full record
+	 * before processing it.
+	 */
+	if (rc->ixc != 0) {
+		return;
+	}
+
+	/*
+	 * We got the full record. Decrypt it.
+	 */
+	pbuf_len = rc->ixa - 5;
+	pbuf = rc->in.vtable->decrypt(&rc->in.vtable,
+		rc->record_type_in, rc->version_in, rc->ibuf + 5, &pbuf_len);
+	if (pbuf == 0) {
+		br_ssl_engine_fail(rc, BR_ERR_BAD_MAC);
+		return;
+	}
+	rc->ixa = (size_t)(pbuf - rc->ibuf);
+	rc->ixb = rc->ixa + pbuf_len;
+
+	/*
+	 * Decryption may have yielded an empty record, in which case
+	 * we get back to "ready" state immediately.
+	 */
+	if (rc->ixa == rc->ixb) {
+		make_ready_in(rc);
+	}
+}
+
+/* see inner.h */
+int
+br_ssl_engine_recvrec_finished(const br_ssl_engine_context *rc)
+{
+	switch (rc->iomode) {
+	case BR_IO_IN:
+	case BR_IO_INOUT:
+		return rc->ixc == 0 || rc->ixa < 5;
+	default:
+		return 1;
+	}
+}
+
+static unsigned char *
+recvpld_buf(const br_ssl_engine_context *rc, size_t *len)
+{
+	/*
+	 * There is payload data to be read only if the mode is
+	 * compatible, and ixa != ixb.
+	 */
+	switch (rc->iomode) {
+	case BR_IO_IN:
+	case BR_IO_INOUT:
+		*len = rc->ixb - rc->ixa;
+		return (*len == 0) ? NULL : (rc->ibuf + rc->ixa);
+	default:
+		*len = 0;
+		return NULL;
+	}
+}
+
+static void
+recvpld_ack(br_ssl_engine_context *rc, size_t len)
+{
+	rc->ixa += len;
+
+	/*
+	 * If we read all the available data, then we either expect
+	 * the remainder of the current record (if the current record
+	 * was not finished; this may happen when encryption is not
+	 * active), or go to "ready" state.
+	 */
+	if (rc->ixa == rc->ixb) {
+		if (rc->ixc == 0) {
+			make_ready_in(rc);
+		} else {
+			rc->ixa = rc->ixb = 5;
+		}
+	}
+}
+
+static unsigned char *
+sendpld_buf(const br_ssl_engine_context *rc, size_t *len)
+{
+	/*
+	 * Payload data can be injected only if the current mode is
+	 * compatible, and oxa != oxb.
+	 */
+	switch (rc->iomode) {
+	case BR_IO_OUT:
+	case BR_IO_INOUT:
+		*len = rc->oxb - rc->oxa;
+		return (*len == 0) ? NULL : (rc->obuf + rc->oxa);
+	default:
+		*len = 0;
+		return NULL;
+	}
+}
+
+/*
+ * If some payload bytes have been accumulated, then wrap them into
+ * an outgoing record. Otherwise, this function does nothing, unless
+ * 'force' is non-zero, in which case an empty record is assembled.
+ *
+ * The caller must take care not to invoke this function if the engine
+ * is not currently ready to receive payload bytes to send.
+ */
+static void
+sendpld_flush(br_ssl_engine_context *rc, int force)
+{
+	size_t xlen;
+	unsigned char *buf;
+
+	if (rc->oxa == rc->oxb) {
+		return;
+	}
+	xlen = rc->oxa - rc->oxc;
+	if (xlen == 0 && !force) {
+		return;
+	}
+	buf = rc->out.vtable->encrypt(&rc->out.vtable,
+		rc->record_type_out, rc->version_out,
+		rc->obuf + rc->oxc, &xlen);
+	rc->oxb = rc->oxa = (size_t)(buf - rc->obuf);
+	rc->oxc = rc->oxa + xlen;
+}
+
+static void
+sendpld_ack(br_ssl_engine_context *rc, size_t len)
+{
+	/*
+	 * If using a shared buffer, then we may have to modify the
+	 * current mode.
+	 */
+	if (rc->iomode == BR_IO_INOUT && rc->ibuf == rc->obuf) {
+		rc->iomode = BR_IO_OUT;
+	}
+	rc->oxa += len;
+	if (rc->oxa >= rc->oxb) {
+		/*
+		 * Set oxb to one more than oxa so that sendpld_flush()
+		 * does not mistakingly believe that a record is
+		 * already prepared and being sent.
+		 */
+		rc->oxb = rc->oxa + 1;
+		sendpld_flush(rc, 0);
+	}
+}
+
+static unsigned char *
+sendrec_buf(const br_ssl_engine_context *rc, size_t *len)
+{
+	/*
+	 * When still gathering payload bytes, oxc points to the start
+	 * of the record data, so oxc <= oxa. However, when a full
+	 * record has been completed, oxc points to the end of the record,
+	 * so oxc > oxa.
+	 */
+	switch (rc->iomode) {
+	case BR_IO_OUT:
+	case BR_IO_INOUT:
+		if (rc->oxc > rc->oxa) {
+			*len = rc->oxc - rc->oxa;
+			return rc->obuf + rc->oxa;
+		}
+		break;
+	}
+	*len = 0;
+	return NULL;
+}
+
+static void
+sendrec_ack(br_ssl_engine_context *rc, size_t len)
+{
+	rc->oxb = (rc->oxa += len);
+	if (rc->oxa == rc->oxc) {
+		make_ready_out(rc);
+	}
+}
+
+/*
+ * Test whether there is some buffered outgoing record that still must
+ * sent.
+ */
+static inline int
+has_rec_tosend(const br_ssl_engine_context *rc)
+{
+	return rc->oxa == rc->oxb && rc->oxa != rc->oxc;
+}
+
+/*
+ * The "no encryption" mode has no overhead. It limits the payload size
+ * to the maximum size allowed by the standard (16384 bytes); the caller
+ * is responsible for possibly enforcing a smaller fragment length.
+ */
+static void
+clear_max_plaintext(const br_sslrec_out_clear_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t len;
+
+	(void)cc;
+	len = *end - *start;
+	if (len > 16384) {
+		*end = *start + 16384;
+	}
+}
+
+/*
+ * In "no encryption" mode, encryption is trivial (a no-operation) so
+ * we just have to encode the header.
+ */
+static unsigned char *
+clear_encrypt(br_sslrec_out_clear_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+
+	(void)cc;
+	buf = (unsigned char *)data - 5;
+	buf[0] = record_type;
+	br_enc16be(buf + 1, version);
+	br_enc16be(buf + 3, *data_len);
+	*data_len += 5;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_class br_sslrec_out_clear_vtable = {
+	sizeof(br_sslrec_out_clear_context),
+	(void (*)(const br_sslrec_out_class *const *, size_t *, size_t *))
+		&clear_max_plaintext,
+	(unsigned char *(*)(const br_sslrec_out_class **,
+		int, unsigned, void *, size_t *))
+		&clear_encrypt
+};
+
+/* ==================================================================== */
+/*
+ * In this part of the file, we handle the various record types, and
+ * communications with the handshake processor.
+ */
+
+/*
+ * IMPLEMENTATION NOTES
+ * ====================
+ *
+ * The handshake processor is written in T0 and runs as a coroutine.
+ * It receives the contents of all records except application data, and
+ * is responsible for producing the contents of all records except
+ * application data.
+ *
+ * A state flag is maintained, which specifies whether application data
+ * is acceptable or not. When it is set:
+ *
+ * -- Application data can be injected as payload data (provided that
+ *    the output buffer is ready for that).
+ *
+ * -- Incoming application data records are accepted, and yield data
+ *    that the caller may retrieve.
+ *
+ * When the flag is cleared, application data is not accepted from the
+ * application, and incoming application data records trigger an error.
+ *
+ *
+ * Records of type handshake, alert or change-cipher-spec are handled
+ * by the handshake processor. The handshake processor is written in T0
+ * and runs as a coroutine; it gets invoked whenever one of the following
+ * situations is reached:
+ *
+ * -- An incoming record has type handshake, alert or change-cipher-spec,
+ *    and yields data that can be read (zero-length records are thus
+ *    ignored).
+ *
+ * -- An outgoing record has just finished being sent, and the "application
+ *    data" flag is cleared.
+ *
+ * -- The caller wishes to perform a close (call to br_ssl_engine_close()).
+ *
+ * -- The caller wishes to perform a renegotiation (call to
+ *    br_ssl_engine_renegotiate()).
+ *
+ * Whenever the handshake processor is entered, access to the payload
+ * buffers is provided, along with some information about explicit
+ * closures or renegotiations.
+ */
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_suites(br_ssl_engine_context *cc,
+	const uint16_t *suites, size_t suites_num)
+{
+	if ((suites_num * sizeof *suites) > sizeof cc->suites_buf) {
+		br_ssl_engine_fail(cc, BR_ERR_BAD_PARAM);
+		return;
+	}
+	memcpy(cc->suites_buf, suites, suites_num * sizeof *suites);
+	cc->suites_num = suites_num;
+}
+
+/*
+ * Give control to handshake processor. 'action' is 1 for a close,
+ * 2 for a renegotiation, or 0 for a jump due to I/O completion.
+ */
+static void
+jump_handshake(br_ssl_engine_context *cc, int action)
+{
+	/*
+	 * We use a loop because the handshake processor actions may
+	 * allow for more actions; namely, if the processor reads all
+	 * input data, then it may allow for output data to be produced,
+	 * in case of a shared in/out buffer.
+	 */
+	for (;;) {
+		size_t hlen_in, hlen_out;
+
+		/*
+		 * Get input buffer. We do not want to provide
+		 * application data to the handshake processor (we could
+		 * get called with an explicit close or renegotiation
+		 * while there is application data ready to be read).
+		 */
+		cc->hbuf_in = recvpld_buf(cc, &hlen_in);
+		if (cc->hbuf_in != NULL
+			&& cc->record_type_in == BR_SSL_APPLICATION_DATA)
+		{
+			hlen_in = 0;
+		}
+
+		/*
+		 * Get output buffer. The handshake processor never
+		 * leaves an unfinished outgoing record, so if there is
+		 * buffered output, then it MUST be some application
+		 * data, so the processor cannot write to it.
+		 */
+		cc->saved_hbuf_out = cc->hbuf_out = sendpld_buf(cc, &hlen_out);
+		if (cc->hbuf_out != NULL && br_ssl_engine_has_pld_to_send(cc)) {
+			hlen_out = 0;
+		}
+
+		/*
+		 * Note: hlen_in and hlen_out can be both non-zero only if
+		 * the input and output buffers are disjoint. Thus, we can
+		 * offer both buffers to the handshake code.
+		 */
+
+		cc->hlen_in = hlen_in;
+		cc->hlen_out = hlen_out;
+		cc->action = action;
+		cc->hsrun(&cc->cpu);
+		if (br_ssl_engine_closed(cc)) {
+			return;
+		}
+		if (cc->hbuf_out != cc->saved_hbuf_out) {
+			sendpld_ack(cc, cc->hbuf_out - cc->saved_hbuf_out);
+		}
+		if (hlen_in != cc->hlen_in) {
+			recvpld_ack(cc, hlen_in - cc->hlen_in);
+			if (cc->hlen_in == 0) {
+				/*
+				 * We read all data bytes, which may have
+				 * released the output buffer in case it
+				 * is shared with the input buffer, and
+				 * the handshake code might be waiting for
+				 * that.
+				 */
+				action = 0;
+				continue;
+			}
+		}
+		break;
+	}
+}
+
+/* see inner.h */
+void
+br_ssl_engine_flush_record(br_ssl_engine_context *cc)
+{
+	if (cc->hbuf_out != cc->saved_hbuf_out) {
+		sendpld_ack(cc, cc->hbuf_out - cc->saved_hbuf_out);
+	}
+	if (br_ssl_engine_has_pld_to_send(cc)) {
+		sendpld_flush(cc, 0);
+	}
+	cc->saved_hbuf_out = cc->hbuf_out = sendpld_buf(cc, &cc->hlen_out);
+}
+
+/* see bearssl_ssl.h */
+unsigned char *
+br_ssl_engine_sendapp_buf(const br_ssl_engine_context *cc, size_t *len)
+{
+	if (!(cc->application_data & 1)) {
+		*len = 0;
+		return NULL;
+	}
+	return sendpld_buf(cc, len);
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_sendapp_ack(br_ssl_engine_context *cc, size_t len)
+{
+	sendpld_ack(cc, len);
+}
+
+/* see bearssl_ssl.h */
+unsigned char *
+br_ssl_engine_recvapp_buf(const br_ssl_engine_context *cc, size_t *len)
+{
+	if (!(cc->application_data & 1)
+		|| cc->record_type_in != BR_SSL_APPLICATION_DATA)
+	{
+		*len = 0;
+		return NULL;
+	}
+	return recvpld_buf(cc, len);
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_recvapp_ack(br_ssl_engine_context *cc, size_t len)
+{
+	recvpld_ack(cc, len);
+}
+
+/* see bearssl_ssl.h */
+unsigned char *
+br_ssl_engine_sendrec_buf(const br_ssl_engine_context *cc, size_t *len)
+{
+	return sendrec_buf(cc, len);
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_sendrec_ack(br_ssl_engine_context *cc, size_t len)
+{
+	sendrec_ack(cc, len);
+	if (len != 0 && !has_rec_tosend(cc)
+		&& (cc->record_type_out != BR_SSL_APPLICATION_DATA
+		|| (cc->application_data & 1) == 0))
+	{
+		jump_handshake(cc, 0);
+	}
+}
+
+/* see bearssl_ssl.h */
+unsigned char *
+br_ssl_engine_recvrec_buf(const br_ssl_engine_context *cc, size_t *len)
+{
+	return recvrec_buf(cc, len);
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_recvrec_ack(br_ssl_engine_context *cc, size_t len)
+{
+	unsigned char *buf;
+
+	recvrec_ack(cc, len);
+	if (br_ssl_engine_closed(cc)) {
+		return;
+	}
+
+	/*
+	 * We just received some bytes from the peer. This may have
+	 * yielded some payload bytes, in which case we must process
+	 * them according to the record type.
+	 */
+	buf = recvpld_buf(cc, &len);
+	if (buf != NULL) {
+		switch (cc->record_type_in) {
+		case BR_SSL_CHANGE_CIPHER_SPEC:
+		case BR_SSL_ALERT:
+		case BR_SSL_HANDSHAKE:
+			jump_handshake(cc, 0);
+			break;
+		case BR_SSL_APPLICATION_DATA:
+			if (cc->application_data == 1) {
+				break;
+			}
+
+			/*
+			 * If we are currently closing, and waiting for
+			 * a close_notify from the peer, then incoming
+			 * application data should be discarded.
+			 */
+			if (cc->application_data == 2) {
+				recvpld_ack(cc, len);
+				break;
+			}
+
+			/* Fall through */
+		default:
+			br_ssl_engine_fail(cc, BR_ERR_UNEXPECTED);
+			break;
+		}
+	}
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_close(br_ssl_engine_context *cc)
+{
+	if (!br_ssl_engine_closed(cc)) {
+		/*
+		 * If we are not already closed, then we need to
+		 * initiate the closure. Once closing, any incoming
+		 * application data is discarded; we should also discard
+		 * application data which is already there but has not
+		 * been acknowledged by the application yet (this mimics
+		 * usual semantics on BSD sockets: you cannot read()
+		 * once you called close(), even if there was some
+		 * unread data already buffered).
+		 */
+		size_t len;
+
+		if (br_ssl_engine_recvapp_buf(cc, &len) != NULL && len != 0) {
+			br_ssl_engine_recvapp_ack(cc, len);
+		}
+		jump_handshake(cc, 1);
+	}
+}
+
+/* see bearssl_ssl.h */
+int
+br_ssl_engine_renegotiate(br_ssl_engine_context *cc)
+{
+	size_t len;
+
+	if (br_ssl_engine_closed(cc) || cc->reneg == 1
+		|| (cc->flags & BR_OPT_NO_RENEGOTIATION) != 0
+		|| br_ssl_engine_recvapp_buf(cc, &len) != NULL)
+	{
+		return 0;
+	}
+	jump_handshake(cc, 2);
+	return 1;
+}
+
+/* see bearssl.h */
+unsigned
+br_ssl_engine_current_state(const br_ssl_engine_context *cc)
+{
+	unsigned s;
+	size_t len;
+
+	if (br_ssl_engine_closed(cc)) {
+		return BR_SSL_CLOSED;
+	}
+
+	s = 0;
+	if (br_ssl_engine_sendrec_buf(cc, &len) != NULL) {
+		s |= BR_SSL_SENDREC;
+	}
+	if (br_ssl_engine_recvrec_buf(cc, &len) != NULL) {
+		s |= BR_SSL_RECVREC;
+	}
+	if (br_ssl_engine_sendapp_buf(cc, &len) != NULL) {
+		s |= BR_SSL_SENDAPP;
+	}
+	if (br_ssl_engine_recvapp_buf(cc, &len) != NULL) {
+		s |= BR_SSL_RECVAPP;
+	}
+	return s;
+}
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_flush(br_ssl_engine_context *cc, int force)
+{
+	if (!br_ssl_engine_closed(cc) && (cc->application_data & 1) != 0) {
+		sendpld_flush(cc, force);
+	}
+}
+
+/* see inner.h */
+void
+br_ssl_engine_hs_reset(br_ssl_engine_context *cc,
+	void (*hsinit)(void *), void (*hsrun)(void *))
+{
+	engine_clearbuf(cc);
+	cc->cpu.dp = cc->dp_stack;
+	cc->cpu.rp = cc->rp_stack;
+	hsinit(&cc->cpu);
+	cc->hsrun = hsrun;
+	cc->shutdown_recv = 0;
+	cc->application_data = 0;
+	cc->alert = 0;
+	jump_handshake(cc, 0);
+}
+
+/* see inner.h */
+br_tls_prf_impl
+br_ssl_engine_get_PRF(br_ssl_engine_context *cc, int prf_id)
+{
+	if (cc->session.version >= BR_TLS12) {
+		if (prf_id == br_sha384_ID) {
+			return cc->prf_sha384;
+		} else {
+			return cc->prf_sha256;
+		}
+	} else {
+		return cc->prf10;
+	}
+}
+
+/* see inner.h */
+void
+br_ssl_engine_compute_master(br_ssl_engine_context *cc,
+	int prf_id, const void *pms, size_t pms_len)
+{
+	br_tls_prf_impl iprf;
+	br_tls_prf_seed_chunk seed[2] = {
+		{ cc->client_random, sizeof cc->client_random },
+		{ cc->server_random, sizeof cc->server_random }
+	};
+
+	iprf = br_ssl_engine_get_PRF(cc, prf_id);
+	iprf(cc->session.master_secret, sizeof cc->session.master_secret,
+		pms, pms_len, "master secret", 2, seed);
+}
+
+/*
+ * Compute key block.
+ */
+static void
+compute_key_block(br_ssl_engine_context *cc, int prf_id,
+	size_t half_len, unsigned char *kb)
+{
+	br_tls_prf_impl iprf;
+	br_tls_prf_seed_chunk seed[2] = {
+		{ cc->server_random, sizeof cc->server_random },
+		{ cc->client_random, sizeof cc->client_random }
+	};
+
+	iprf = br_ssl_engine_get_PRF(cc, prf_id);
+	iprf(kb, half_len << 1,
+		cc->session.master_secret, sizeof cc->session.master_secret,
+		"key expansion", 2, seed);
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_cbc_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id, int mac_id,
+	const br_block_cbcdec_class *bc_impl, size_t cipher_key_len)
+{
+	unsigned char kb[192];
+	unsigned char *cipher_key, *mac_key, *iv;
+	const br_hash_class *imh;
+	size_t mac_key_len, mac_out_len, iv_len;
+
+	imh = br_ssl_engine_get_hash(cc, mac_id);
+	mac_out_len = (imh->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+	mac_key_len = mac_out_len;
+
+	/*
+	 * TLS 1.1+ uses per-record explicit IV, so no IV to generate here.
+	 */
+	if (cc->session.version >= BR_TLS11) {
+		iv_len = 0;
+	} else {
+		iv_len = bc_impl->block_size;
+	}
+	compute_key_block(cc, prf_id,
+		mac_key_len + cipher_key_len + iv_len, kb);
+	if (is_client) {
+		mac_key = &kb[mac_key_len];
+		cipher_key = &kb[(mac_key_len << 1) + cipher_key_len];
+		iv = &kb[((mac_key_len + cipher_key_len) << 1) + iv_len];
+	} else {
+		mac_key = &kb[0];
+		cipher_key = &kb[mac_key_len << 1];
+		iv = &kb[(mac_key_len + cipher_key_len) << 1];
+	}
+	if (iv_len == 0) {
+		iv = NULL;
+	}
+	cc->icbc_in->init(&cc->in.cbc.vtable,
+		bc_impl, cipher_key, cipher_key_len,
+		imh, mac_key, mac_key_len, mac_out_len, iv);
+	cc->incrypt = 1;
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_cbc_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id, int mac_id,
+	const br_block_cbcenc_class *bc_impl, size_t cipher_key_len)
+{
+	unsigned char kb[192];
+	unsigned char *cipher_key, *mac_key, *iv;
+	const br_hash_class *imh;
+	size_t mac_key_len, mac_out_len, iv_len;
+
+	imh = br_ssl_engine_get_hash(cc, mac_id);
+	mac_out_len = (imh->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+	mac_key_len = mac_out_len;
+
+	/*
+	 * TLS 1.1+ uses per-record explicit IV, so no IV to generate here.
+	 */
+	if (cc->session.version >= BR_TLS11) {
+		iv_len = 0;
+	} else {
+		iv_len = bc_impl->block_size;
+	}
+	compute_key_block(cc, prf_id,
+		mac_key_len + cipher_key_len + iv_len, kb);
+	if (is_client) {
+		mac_key = &kb[0];
+		cipher_key = &kb[mac_key_len << 1];
+		iv = &kb[(mac_key_len + cipher_key_len) << 1];
+	} else {
+		mac_key = &kb[mac_key_len];
+		cipher_key = &kb[(mac_key_len << 1) + cipher_key_len];
+		iv = &kb[((mac_key_len + cipher_key_len) << 1) + iv_len];
+	}
+	if (iv_len == 0) {
+		iv = NULL;
+	}
+	cc->icbc_out->init(&cc->out.cbc.vtable,
+		bc_impl, cipher_key, cipher_key_len,
+		imh, mac_key, mac_key_len, mac_out_len, iv);
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_gcm_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctr_class *bc_impl, size_t cipher_key_len)
+{
+	unsigned char kb[72];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, cipher_key_len + 4, kb);
+	if (is_client) {
+		cipher_key = &kb[cipher_key_len];
+		iv = &kb[(cipher_key_len << 1) + 4];
+	} else {
+		cipher_key = &kb[0];
+		iv = &kb[cipher_key_len << 1];
+	}
+	cc->igcm_in->init(&cc->in.gcm.vtable.in,
+		bc_impl, cipher_key, cipher_key_len, cc->ighash, iv);
+	cc->incrypt = 1;
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_gcm_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctr_class *bc_impl, size_t cipher_key_len)
+{
+	unsigned char kb[72];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, cipher_key_len + 4, kb);
+	if (is_client) {
+		cipher_key = &kb[0];
+		iv = &kb[cipher_key_len << 1];
+	} else {
+		cipher_key = &kb[cipher_key_len];
+		iv = &kb[(cipher_key_len << 1) + 4];
+	}
+	cc->igcm_out->init(&cc->out.gcm.vtable.out,
+		bc_impl, cipher_key, cipher_key_len, cc->ighash, iv);
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_chapol_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id)
+{
+	unsigned char kb[88];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, 44, kb);
+	if (is_client) {
+		cipher_key = &kb[32];
+		iv = &kb[76];
+	} else {
+		cipher_key = &kb[0];
+		iv = &kb[64];
+	}
+	cc->ichapol_in->init(&cc->in.chapol.vtable.in,
+		cc->ichacha, cc->ipoly, cipher_key, iv);
+	cc->incrypt = 1;
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_chapol_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id)
+{
+	unsigned char kb[88];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, 44, kb);
+	if (is_client) {
+		cipher_key = &kb[0];
+		iv = &kb[64];
+	} else {
+		cipher_key = &kb[32];
+		iv = &kb[76];
+	}
+	cc->ichapol_out->init(&cc->out.chapol.vtable.out,
+		cc->ichacha, cc->ipoly, cipher_key, iv);
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_ccm_in(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctrcbc_class *bc_impl,
+	size_t cipher_key_len, size_t tag_len)
+{
+	unsigned char kb[72];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, cipher_key_len + 4, kb);
+	if (is_client) {
+		cipher_key = &kb[cipher_key_len];
+		iv = &kb[(cipher_key_len << 1) + 4];
+	} else {
+		cipher_key = &kb[0];
+		iv = &kb[cipher_key_len << 1];
+	}
+	cc->iccm_in->init(&cc->in.ccm.vtable.in,
+		bc_impl, cipher_key, cipher_key_len, iv, tag_len);
+	cc->incrypt = 1;
+}
+
+/* see inner.h */
+void
+br_ssl_engine_switch_ccm_out(br_ssl_engine_context *cc,
+	int is_client, int prf_id,
+	const br_block_ctrcbc_class *bc_impl,
+	size_t cipher_key_len, size_t tag_len)
+{
+	unsigned char kb[72];
+	unsigned char *cipher_key, *iv;
+
+	compute_key_block(cc, prf_id, cipher_key_len + 4, kb);
+	if (is_client) {
+		cipher_key = &kb[0];
+		iv = &kb[cipher_key_len << 1];
+	} else {
+		cipher_key = &kb[cipher_key_len];
+		iv = &kb[(cipher_key_len << 1) + 4];
+	}
+	cc->iccm_out->init(&cc->out.ccm.vtable.out,
+		bc_impl, cipher_key, cipher_key_len, iv, tag_len);
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_aescbc.c b/third_party/bearssl/src/ssl_engine_default_aescbc.c
new file mode 100644
index 0000000..8c5cdb5
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_aescbc.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_aes_cbc(br_ssl_engine_context *cc)
+{
+#if BR_AES_X86NI || BR_POWER8
+	const br_block_cbcenc_class *ienc;
+	const br_block_cbcdec_class *idec;
+#endif
+
+	br_ssl_engine_set_cbc(cc,
+		&br_sslrec_in_cbc_vtable,
+		&br_sslrec_out_cbc_vtable);
+#if BR_AES_X86NI
+	ienc = br_aes_x86ni_cbcenc_get_vtable();
+	idec = br_aes_x86ni_cbcdec_get_vtable();
+	if (ienc != NULL && idec != NULL) {
+		br_ssl_engine_set_aes_cbc(cc, ienc, idec);
+		return;
+	}
+#endif
+#if BR_POWER8
+	ienc = br_aes_pwr8_cbcenc_get_vtable();
+	idec = br_aes_pwr8_cbcdec_get_vtable();
+	if (ienc != NULL && idec != NULL) {
+		br_ssl_engine_set_aes_cbc(cc, ienc, idec);
+		return;
+	}
+#endif
+#if BR_64
+	br_ssl_engine_set_aes_cbc(cc,
+		&br_aes_ct64_cbcenc_vtable,
+		&br_aes_ct64_cbcdec_vtable);
+#else
+	br_ssl_engine_set_aes_cbc(cc,
+		&br_aes_ct_cbcenc_vtable,
+		&br_aes_ct_cbcdec_vtable);
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_aesccm.c b/third_party/bearssl/src/ssl_engine_default_aesccm.c
new file mode 100644
index 0000000..15c0a78
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_aesccm.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_aes_ccm(br_ssl_engine_context *cc)
+{
+#if BR_AES_X86NI || BR_POWER8
+	const br_block_ctrcbc_class *ictrcbc;
+#endif
+
+	br_ssl_engine_set_ccm(cc,
+		&br_sslrec_in_ccm_vtable,
+		&br_sslrec_out_ccm_vtable);
+#if BR_AES_X86NI
+	ictrcbc = br_aes_x86ni_ctrcbc_get_vtable();
+	if (ictrcbc != NULL) {
+		br_ssl_engine_set_aes_ctrcbc(cc, ictrcbc);
+	} else {
+#if BR_64
+		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
+#else
+		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
+#endif
+	}
+#elif BR_POWER8
+	ictrcbc = br_aes_pwr8_ctrcbc_get_vtable();
+	if (ictrcbc != NULL) {
+		br_ssl_engine_set_aes_ctrcbc(cc, ictrcbc);
+	} else {
+#if BR_64
+		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
+#else
+		br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
+#endif
+	}
+#else
+#if BR_64
+	br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct64_ctrcbc_vtable);
+#else
+	br_ssl_engine_set_aes_ctrcbc(cc, &br_aes_ct_ctrcbc_vtable);
+#endif
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_aesgcm.c b/third_party/bearssl/src/ssl_engine_default_aesgcm.c
new file mode 100644
index 0000000..c44a707
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_aesgcm.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_aes_gcm(br_ssl_engine_context *cc)
+{
+#if BR_AES_X86NI || BR_POWER8
+	const br_block_ctr_class *ictr;
+	br_ghash ighash;
+#endif
+
+	br_ssl_engine_set_gcm(cc,
+		&br_sslrec_in_gcm_vtable,
+		&br_sslrec_out_gcm_vtable);
+#if BR_AES_X86NI
+	ictr = br_aes_x86ni_ctr_get_vtable();
+	if (ictr != NULL) {
+		br_ssl_engine_set_aes_ctr(cc, ictr);
+	} else {
+#if BR_64
+		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
+#else
+		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
+#endif
+	}
+#elif BR_POWER8
+	ictr = br_aes_pwr8_ctr_get_vtable();
+	if (ictr != NULL) {
+		br_ssl_engine_set_aes_ctr(cc, ictr);
+	} else {
+#if BR_64
+		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
+#else
+		br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
+#endif
+	}
+#else
+#if BR_64
+	br_ssl_engine_set_aes_ctr(cc, &br_aes_ct64_ctr_vtable);
+#else
+	br_ssl_engine_set_aes_ctr(cc, &br_aes_ct_ctr_vtable);
+#endif
+#endif
+#if BR_AES_X86NI
+	ighash = br_ghash_pclmul_get();
+	if (ighash != 0) {
+		br_ssl_engine_set_ghash(cc, ighash);
+		return;
+	}
+#endif
+#if BR_POWER8
+	ighash = br_ghash_pwr8_get();
+	if (ighash != 0) {
+		br_ssl_engine_set_ghash(cc, ighash);
+		return;
+	}
+#endif
+#if BR_LOMUL
+	br_ssl_engine_set_ghash(cc, &br_ghash_ctmul32);
+#elif BR_64
+	br_ssl_engine_set_ghash(cc, &br_ghash_ctmul64);
+#else
+	br_ssl_engine_set_ghash(cc, &br_ghash_ctmul);
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_chapol.c b/third_party/bearssl/src/ssl_engine_default_chapol.c
new file mode 100644
index 0000000..47a0c98
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_chapol.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_chapol(br_ssl_engine_context *cc)
+{
+#if BR_INT128 || BR_UMUL128
+	br_poly1305_run bp;
+#endif
+#if BR_SSE2
+	br_chacha20_run bc;
+#endif
+
+	br_ssl_engine_set_chapol(cc,
+		&br_sslrec_in_chapol_vtable,
+		&br_sslrec_out_chapol_vtable);
+#if BR_SSE2
+	bc = br_chacha20_sse2_get();
+	if (bc) {
+		br_ssl_engine_set_chacha20(cc, bc);
+	} else {
+#endif
+		br_ssl_engine_set_chacha20(cc, &br_chacha20_ct_run);
+#if BR_SSE2
+	}
+#endif
+#if BR_INT128 || BR_UMUL128
+	bp = br_poly1305_ctmulq_get();
+	if (bp) {
+		br_ssl_engine_set_poly1305(cc, bp);
+	} else {
+#endif
+#if BR_LOMUL
+		br_ssl_engine_set_poly1305(cc, &br_poly1305_ctmul32_run);
+#else
+		br_ssl_engine_set_poly1305(cc, &br_poly1305_ctmul_run);
+#endif
+#if BR_INT128 || BR_UMUL128
+	}
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_descbc.c b/third_party/bearssl/src/ssl_engine_default_descbc.c
new file mode 100644
index 0000000..0253cb2
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_descbc.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_des_cbc(br_ssl_engine_context *cc)
+{
+	br_ssl_engine_set_cbc(cc,
+		&br_sslrec_in_cbc_vtable,
+		&br_sslrec_out_cbc_vtable);
+	br_ssl_engine_set_des_cbc(cc,
+		&br_des_ct_cbcenc_vtable,
+		&br_des_ct_cbcdec_vtable);
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_ec.c b/third_party/bearssl/src/ssl_engine_default_ec.c
new file mode 100644
index 0000000..0213ae6
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_ec.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_ec(br_ssl_engine_context *cc)
+{
+#if BR_LOMUL
+	br_ssl_engine_set_ec(cc, &br_ec_all_m15);
+#else
+	br_ssl_engine_set_ec(cc, &br_ec_all_m31);
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_ecdsa.c b/third_party/bearssl/src/ssl_engine_default_ecdsa.c
new file mode 100644
index 0000000..1304002
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_ecdsa.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_ecdsa(br_ssl_engine_context *cc)
+{
+#if BR_LOMUL
+	br_ssl_engine_set_ec(cc, &br_ec_all_m15);
+	br_ssl_engine_set_ecdsa(cc, &br_ecdsa_i15_vrfy_asn1);
+#else
+	br_ssl_engine_set_ec(cc, &br_ec_all_m31);
+	br_ssl_engine_set_ecdsa(cc, &br_ecdsa_i31_vrfy_asn1);
+#endif
+}
diff --git a/third_party/bearssl/src/ssl_engine_default_rsavrfy.c b/third_party/bearssl/src/ssl_engine_default_rsavrfy.c
new file mode 100644
index 0000000..ad0628a
--- /dev/null
+++ b/third_party/bearssl/src/ssl_engine_default_rsavrfy.c
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_ssl_engine_set_default_rsavrfy(br_ssl_engine_context *cc)
+{
+	br_ssl_engine_set_rsavrfy(cc, br_rsa_pkcs1_vrfy_get_default());
+}
diff --git a/third_party/bearssl/src/ssl_hashes.c b/third_party/bearssl/src/ssl_hashes.c
new file mode 100644
index 0000000..e10a980
--- /dev/null
+++ b/third_party/bearssl/src/ssl_hashes.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+int
+br_ssl_choose_hash(unsigned bf)
+{
+	static const unsigned char pref[] = {
+		br_sha256_ID, br_sha384_ID, br_sha512_ID,
+		br_sha224_ID, br_sha1_ID
+	};
+	size_t u;
+
+	for (u = 0; u < sizeof pref; u ++) {
+		int x;
+
+		x = pref[u];
+		if ((bf >> x) & 1) {
+			return x;
+		}
+	}
+	return 0;
+}
diff --git a/third_party/bearssl/src/ssl_hs_client.c b/third_party/bearssl/src/ssl_hs_client.c
new file mode 100644
index 0000000..de36165
--- /dev/null
+++ b/third_party/bearssl/src/ssl_hs_client.c
@@ -0,0 +1,1915 @@
+/* Automatically generated code; do not modify directly. */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+	uint32_t *dp;
+	uint32_t *rp;
+	const unsigned char *ip;
+} t0_context;
+
+static uint32_t
+t0_parse7E_unsigned(const unsigned char **p)
+{
+	uint32_t x;
+
+	x = 0;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			return x;
+		}
+	}
+}
+
+static int32_t
+t0_parse7E_signed(const unsigned char **p)
+{
+	int neg;
+	uint32_t x;
+
+	neg = ((**p) >> 6) & 1;
+	x = (uint32_t)-neg;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			if (neg) {
+				return -(int32_t)~x - 1;
+			} else {
+				return (int32_t)x;
+			}
+		}
+	}
+}
+
+#define T0_VBYTE(x, n)   (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
+#define T0_FBYTE(x, n)   (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
+#define T0_SBYTE(x)      (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
+#define T0_INT1(x)       T0_FBYTE(x, 0)
+#define T0_INT2(x)       T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT3(x)       T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT4(x)       T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT5(x)       T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+
+/* static const unsigned char t0_datablock[]; */
+
+
+void br_ssl_hs_client_init_main(void *t0ctx);
+
+void br_ssl_hs_client_run(void *t0ctx);
+
+
+
+#include <stddef.h>
+#include <string.h>
+
+#include "inner.h"
+
+/*
+ * This macro evaluates to a pointer to the current engine context.
+ */
+#define ENG  ((br_ssl_engine_context *)(void *)((unsigned char *)t0ctx - offsetof(br_ssl_engine_context, cpu)))
+
+
+
+
+
+/*
+ * This macro evaluates to a pointer to the client context, under that
+ * specific name. It must be noted that since the engine context is the
+ * first field of the br_ssl_client_context structure ('eng'), then
+ * pointers values of both types are interchangeable, modulo an
+ * appropriate cast. This also means that "addresses" computed as offsets
+ * within the structure work for both kinds of context.
+ */
+#define CTX  ((br_ssl_client_context *)ENG)
+
+/*
+ * Generate the pre-master secret for RSA key exchange, and encrypt it
+ * with the server's public key. Returned value is either the encrypted
+ * data length (in bytes), or -x on error, with 'x' being an error code.
+ *
+ * This code assumes that the public key has been already verified (it
+ * was properly obtained by the X.509 engine, and it has the right type,
+ * i.e. it is of type RSA and suitable for encryption).
+ */
+static int
+make_pms_rsa(br_ssl_client_context *ctx, int prf_id)
+{
+	const br_x509_class **xc;
+	const br_x509_pkey *pk;
+	const unsigned char *n;
+	unsigned char *pms;
+	size_t nlen, u;
+
+	xc = ctx->eng.x509ctx;
+	pk = (*xc)->get_pkey(xc, NULL);
+
+	/*
+	 * Compute actual RSA key length, in case there are leading zeros.
+	 */
+	n = pk->key.rsa.n;
+	nlen = pk->key.rsa.nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+
+	/*
+	 * We need at least 59 bytes (48 bytes for pre-master secret, and
+	 * 11 bytes for the PKCS#1 type 2 padding). Note that the X.509
+	 * minimal engine normally blocks RSA keys shorter than 128 bytes,
+	 * so this is mostly for public keys provided explicitly by the
+	 * caller.
+	 */
+	if (nlen < 59) {
+		return -BR_ERR_X509_WEAK_PUBLIC_KEY;
+	}
+	if (nlen > sizeof ctx->eng.pad) {
+		return -BR_ERR_LIMIT_EXCEEDED;
+	}
+
+	/*
+	 * Make PMS.
+	 */
+	pms = ctx->eng.pad + nlen - 48;
+	br_enc16be(pms, ctx->eng.version_max);
+	br_hmac_drbg_generate(&ctx->eng.rng, pms + 2, 46);
+	br_ssl_engine_compute_master(&ctx->eng, prf_id, pms, 48);
+
+	/*
+	 * Apply PKCS#1 type 2 padding.
+	 */
+	ctx->eng.pad[0] = 0x00;
+	ctx->eng.pad[1] = 0x02;
+	ctx->eng.pad[nlen - 49] = 0x00;
+	br_hmac_drbg_generate(&ctx->eng.rng, ctx->eng.pad + 2, nlen - 51);
+	for (u = 2; u < nlen - 49; u ++) {
+		while (ctx->eng.pad[u] == 0) {
+			br_hmac_drbg_generate(&ctx->eng.rng,
+				&ctx->eng.pad[u], 1);
+		}
+	}
+
+	/*
+	 * Compute RSA encryption.
+	 */
+	if (!ctx->irsapub(ctx->eng.pad, nlen, &pk->key.rsa)) {
+		return -BR_ERR_LIMIT_EXCEEDED;
+	}
+	return (int)nlen;
+}
+
+/*
+ * OID for hash functions in RSA signatures.
+ */
+static const unsigned char *HASH_OID[] = {
+	BR_HASH_OID_SHA1,
+	BR_HASH_OID_SHA224,
+	BR_HASH_OID_SHA256,
+	BR_HASH_OID_SHA384,
+	BR_HASH_OID_SHA512
+};
+
+/*
+ * Check the RSA signature on the ServerKeyExchange message.
+ *
+ *   hash      hash function ID (2 to 6), or 0 for MD5+SHA-1 (with RSA only)
+ *   use_rsa   non-zero for RSA signature, zero for ECDSA
+ *   sig_len   signature length (in bytes); signature value is in the pad
+ *
+ * Returned value is 0 on success, or an error code.
+ */
+static int
+verify_SKE_sig(br_ssl_client_context *ctx,
+	int hash, int use_rsa, size_t sig_len)
+{
+	const br_x509_class **xc;
+	const br_x509_pkey *pk;
+	br_multihash_context mhc;
+	unsigned char hv[64], head[4];
+	size_t hv_len;
+
+	xc = ctx->eng.x509ctx;
+	pk = (*xc)->get_pkey(xc, NULL);
+	br_multihash_zero(&mhc);
+	br_multihash_copyimpl(&mhc, &ctx->eng.mhash);
+	br_multihash_init(&mhc);
+	br_multihash_update(&mhc,
+		ctx->eng.client_random, sizeof ctx->eng.client_random);
+	br_multihash_update(&mhc,
+		ctx->eng.server_random, sizeof ctx->eng.server_random);
+	head[0] = 3;
+	head[1] = 0;
+	head[2] = ctx->eng.ecdhe_curve;
+	head[3] = ctx->eng.ecdhe_point_len;
+	br_multihash_update(&mhc, head, sizeof head);
+	br_multihash_update(&mhc,
+		ctx->eng.ecdhe_point, ctx->eng.ecdhe_point_len);
+	if (hash) {
+		hv_len = br_multihash_out(&mhc, hash, hv);
+		if (hv_len == 0) {
+			return BR_ERR_INVALID_ALGORITHM;
+		}
+	} else {
+		if (!br_multihash_out(&mhc, br_md5_ID, hv)
+			|| !br_multihash_out(&mhc, br_sha1_ID, hv + 16))
+		{
+			return BR_ERR_INVALID_ALGORITHM;
+		}
+		hv_len = 36;
+	}
+	if (use_rsa) {
+		unsigned char tmp[64];
+		const unsigned char *hash_oid;
+
+		if (hash) {
+			hash_oid = HASH_OID[hash - 2];
+		} else {
+			hash_oid = NULL;
+		}
+		if (!ctx->eng.irsavrfy(ctx->eng.pad, sig_len,
+			hash_oid, hv_len, &pk->key.rsa, tmp)
+			|| memcmp(tmp, hv, hv_len) != 0)
+		{
+			return BR_ERR_BAD_SIGNATURE;
+		}
+	} else {
+		if (!ctx->eng.iecdsa(ctx->eng.iec, hv, hv_len, &pk->key.ec,
+			ctx->eng.pad, sig_len))
+		{
+			return BR_ERR_BAD_SIGNATURE;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Perform client-side ECDH (or ECDHE). The point that should be sent to
+ * the server is written in the pad; returned value is either the point
+ * length (in bytes), or -x on error, with 'x' being an error code.
+ *
+ * The point _from_ the server is taken from ecdhe_point[] if 'ecdhe'
+ * is non-zero, or from the X.509 engine context if 'ecdhe' is zero
+ * (for static ECDH).
+ */
+static int
+make_pms_ecdh(br_ssl_client_context *ctx, unsigned ecdhe, int prf_id)
+{
+	int curve;
+	unsigned char key[66], point[133];
+	const unsigned char *order, *point_src;
+	size_t glen, olen, point_len, xoff, xlen;
+	unsigned char mask;
+
+	if (ecdhe) {
+		curve = ctx->eng.ecdhe_curve;
+		point_src = ctx->eng.ecdhe_point;
+		point_len = ctx->eng.ecdhe_point_len;
+	} else {
+		const br_x509_class **xc;
+		const br_x509_pkey *pk;
+
+		xc = ctx->eng.x509ctx;
+		pk = (*xc)->get_pkey(xc, NULL);
+		curve = pk->key.ec.curve;
+		point_src = pk->key.ec.q;
+		point_len = pk->key.ec.qlen;
+	}
+	if ((ctx->eng.iec->supported_curves & ((uint32_t)1 << curve)) == 0) {
+		return -BR_ERR_INVALID_ALGORITHM;
+	}
+
+	/*
+	 * We need to generate our key, as a non-zero random value which
+	 * is lower than the curve order, in a "large enough" range. We
+	 * force top bit to 0 and bottom bit to 1, which guarantees that
+	 * the value is in the proper range.
+	 */
+	order = ctx->eng.iec->order(curve, &olen);
+	mask = 0xFF;
+	while (mask >= order[0]) {
+		mask >>= 1;
+	}
+	br_hmac_drbg_generate(&ctx->eng.rng, key, olen);
+	key[0] &= mask;
+	key[olen - 1] |= 0x01;
+
+	/*
+	 * Compute the common ECDH point, whose X coordinate is the
+	 * pre-master secret.
+	 */
+	ctx->eng.iec->generator(curve, &glen);
+	if (glen != point_len) {
+		return -BR_ERR_INVALID_ALGORITHM;
+	}
+
+	memcpy(point, point_src, glen);
+	if (!ctx->eng.iec->mul(point, glen, key, olen, curve)) {
+		return -BR_ERR_INVALID_ALGORITHM;
+	}
+
+	/*
+	 * The pre-master secret is the X coordinate.
+	 */
+	xoff = ctx->eng.iec->xoff(curve, &xlen);
+	br_ssl_engine_compute_master(&ctx->eng, prf_id, point + xoff, xlen);
+
+	ctx->eng.iec->mulgen(point, key, olen, curve);
+	memcpy(ctx->eng.pad, point, glen);
+	return (int)glen;
+}
+
+/*
+ * Perform full static ECDH. This occurs only in the context of client
+ * authentication with certificates: the server uses an EC public key,
+ * the cipher suite is of type ECDH (not ECDHE), the server requested a
+ * client certificate and accepts static ECDH, the client has a
+ * certificate with an EC public key in the same curve, and accepts
+ * static ECDH as well.
+ *
+ * Returned value is 0 on success, -1 on error.
+ */
+static int
+make_pms_static_ecdh(br_ssl_client_context *ctx, int prf_id)
+{
+	unsigned char point[133];
+	size_t point_len;
+	const br_x509_class **xc;
+	const br_x509_pkey *pk;
+
+	xc = ctx->eng.x509ctx;
+	pk = (*xc)->get_pkey(xc, NULL);
+	point_len = pk->key.ec.qlen;
+	if (point_len > sizeof point) {
+		return -1;
+	}
+	memcpy(point, pk->key.ec.q, point_len);
+	if (!(*ctx->client_auth_vtable)->do_keyx(
+		ctx->client_auth_vtable, point, &point_len))
+	{
+		return -1;
+	}
+	br_ssl_engine_compute_master(&ctx->eng,
+		prf_id, point, point_len);
+	return 0;
+}
+
+/*
+ * Compute the client-side signature. This is invoked only when a
+ * signature-based client authentication was selected. The computed
+ * signature is in the pad; its length (in bytes) is returned. On
+ * error, 0 is returned.
+ */
+static size_t
+make_client_sign(br_ssl_client_context *ctx)
+{
+	size_t hv_len;
+
+	/*
+	 * Compute hash of handshake messages so far. This "cannot" fail
+	 * because the list of supported hash functions provided to the
+	 * client certificate handler was trimmed to include only the
+	 * hash functions that the multi-hasher supports.
+	 */
+	if (ctx->hash_id) {
+		hv_len = br_multihash_out(&ctx->eng.mhash,
+			ctx->hash_id, ctx->eng.pad);
+	} else {
+		br_multihash_out(&ctx->eng.mhash,
+			br_md5_ID, ctx->eng.pad);
+		br_multihash_out(&ctx->eng.mhash,
+			br_sha1_ID, ctx->eng.pad + 16);
+		hv_len = 36;
+	}
+	return (*ctx->client_auth_vtable)->do_sign(
+		ctx->client_auth_vtable, ctx->hash_id, hv_len,
+		ctx->eng.pad, sizeof ctx->eng.pad);
+}
+
+
+
+static const unsigned char t0_datablock[] = {
+	0x00, 0x00, 0x0A, 0x00, 0x24, 0x00, 0x2F, 0x01, 0x24, 0x00, 0x35, 0x02,
+	0x24, 0x00, 0x3C, 0x01, 0x44, 0x00, 0x3D, 0x02, 0x44, 0x00, 0x9C, 0x03,
+	0x04, 0x00, 0x9D, 0x04, 0x05, 0xC0, 0x03, 0x40, 0x24, 0xC0, 0x04, 0x41,
+	0x24, 0xC0, 0x05, 0x42, 0x24, 0xC0, 0x08, 0x20, 0x24, 0xC0, 0x09, 0x21,
+	0x24, 0xC0, 0x0A, 0x22, 0x24, 0xC0, 0x0D, 0x30, 0x24, 0xC0, 0x0E, 0x31,
+	0x24, 0xC0, 0x0F, 0x32, 0x24, 0xC0, 0x12, 0x10, 0x24, 0xC0, 0x13, 0x11,
+	0x24, 0xC0, 0x14, 0x12, 0x24, 0xC0, 0x23, 0x21, 0x44, 0xC0, 0x24, 0x22,
+	0x55, 0xC0, 0x25, 0x41, 0x44, 0xC0, 0x26, 0x42, 0x55, 0xC0, 0x27, 0x11,
+	0x44, 0xC0, 0x28, 0x12, 0x55, 0xC0, 0x29, 0x31, 0x44, 0xC0, 0x2A, 0x32,
+	0x55, 0xC0, 0x2B, 0x23, 0x04, 0xC0, 0x2C, 0x24, 0x05, 0xC0, 0x2D, 0x43,
+	0x04, 0xC0, 0x2E, 0x44, 0x05, 0xC0, 0x2F, 0x13, 0x04, 0xC0, 0x30, 0x14,
+	0x05, 0xC0, 0x31, 0x33, 0x04, 0xC0, 0x32, 0x34, 0x05, 0xC0, 0x9C, 0x06,
+	0x04, 0xC0, 0x9D, 0x07, 0x04, 0xC0, 0xA0, 0x08, 0x04, 0xC0, 0xA1, 0x09,
+	0x04, 0xC0, 0xAC, 0x26, 0x04, 0xC0, 0xAD, 0x27, 0x04, 0xC0, 0xAE, 0x28,
+	0x04, 0xC0, 0xAF, 0x29, 0x04, 0xCC, 0xA8, 0x15, 0x04, 0xCC, 0xA9, 0x25,
+	0x04, 0x00, 0x00
+};
+
+static const unsigned char t0_codeblock[] = {
+	0x00, 0x01, 0x00, 0x0A, 0x00, 0x00, 0x01, 0x00, 0x0D, 0x00, 0x00, 0x01,
+	0x00, 0x0E, 0x00, 0x00, 0x01, 0x00, 0x0F, 0x00, 0x00, 0x01, 0x01, 0x08,
+	0x00, 0x00, 0x01, 0x01, 0x09, 0x00, 0x00, 0x01, 0x02, 0x08, 0x00, 0x00,
+	0x01, 0x02, 0x09, 0x00, 0x00, 0x25, 0x25, 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_CCS), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_CIPHER_SUITE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_COMPRESSION), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_FINISHED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_FRAGLEN), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_HANDSHAKE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_HELLO_DONE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_PARAM), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_SECRENEG), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_BAD_SNI), 0x00, 0x00, 0x01, T0_INT1(BR_ERR_BAD_VERSION),
+	0x00, 0x00, 0x01, T0_INT1(BR_ERR_EXTRA_EXTENSION), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_INVALID_ALGORITHM), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_LIMIT_EXCEEDED), 0x00, 0x00, 0x01, T0_INT1(BR_ERR_OK),
+	0x00, 0x00, 0x01, T0_INT1(BR_ERR_OVERSIZED_ID), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_RESUME_MISMATCH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_UNEXPECTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_UNSUPPORTED_VERSION), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_WRONG_KEY_USAGE), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, action)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, alert)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, application_data)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_client_context, auth_type)), 0x00, 0x00,
+	0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, cipher_suite)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, client_random)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, close_received)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, ecdhe_curve)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, ecdhe_point)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, ecdhe_point_len)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, flags)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_client_context, hash_id)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_client_context, hashes)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, log_max_frag_len)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_client_context, min_clienthello_len)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, pad)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, protocol_names_num)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, record_type_in)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, record_type_out)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, reneg)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, saved_finished)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, selected_protocol)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, server_name)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, server_random)), 0x00, 0x00,
+	0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, session_id)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, session_id_len)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, shutdown_recv)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, suites_buf)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, suites_num)), 0x00, 0x00,
+	0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, session) + offsetof(br_ssl_session_parameters, version)),
+	0x00, 0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, version_in)),
+	0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_ssl_engine_context, version_max)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(br_ssl_engine_context, version_min)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(br_ssl_engine_context, version_out)),
+	0x00, 0x00, 0x09, 0x26, 0x58, 0x06, 0x02, 0x68, 0x28, 0x00, 0x00, 0x06,
+	0x08, 0x2C, 0x0E, 0x05, 0x02, 0x71, 0x28, 0x04, 0x01, 0x3C, 0x00, 0x00,
+	0x01, 0x01, 0x00, 0x01, 0x03, 0x00, 0x99, 0x26, 0x5E, 0x44, 0x9D, 0x26,
+	0x05, 0x04, 0x60, 0x01, 0x00, 0x00, 0x02, 0x00, 0x0E, 0x06, 0x02, 0x9D,
+	0x00, 0x5E, 0x04, 0x6B, 0x00, 0x06, 0x02, 0x68, 0x28, 0x00, 0x00, 0x26,
+	0x89, 0x44, 0x05, 0x03, 0x01, 0x0C, 0x08, 0x44, 0x79, 0x2C, 0xAB, 0x1C,
+	0x84, 0x01, 0x0C, 0x31, 0x00, 0x00, 0x26, 0x1F, 0x01, 0x08, 0x0B, 0x44,
+	0x5C, 0x1F, 0x08, 0x00, 0x01, 0x03, 0x00, 0x77, 0x2E, 0x02, 0x00, 0x36,
+	0x17, 0x01, 0x01, 0x0B, 0x77, 0x3E, 0x29, 0x1A, 0x36, 0x06, 0x07, 0x02,
+	0x00, 0xCF, 0x03, 0x00, 0x04, 0x75, 0x01, 0x00, 0xC5, 0x02, 0x00, 0x26,
+	0x1A, 0x17, 0x06, 0x02, 0x6F, 0x28, 0xCF, 0x04, 0x76, 0x01, 0x01, 0x00,
+	0x77, 0x3E, 0x01, 0x16, 0x87, 0x3E, 0x01, 0x00, 0x8A, 0x3C, 0x34, 0xD5,
+	0x29, 0xB4, 0x06, 0x09, 0x01, 0x7F, 0xAF, 0x01, 0x7F, 0xD2, 0x04, 0x80,
+	0x53, 0xB1, 0x79, 0x2C, 0xA1, 0x01, T0_INT1(BR_KEYTYPE_SIGN), 0x17,
+	0x06, 0x01, 0xB5, 0xB8, 0x26, 0x01, 0x0D, 0x0E, 0x06, 0x07, 0x25, 0xB7,
+	0xB8, 0x01, 0x7F, 0x04, 0x02, 0x01, 0x00, 0x03, 0x00, 0x01, 0x0E, 0x0E,
+	0x05, 0x02, 0x72, 0x28, 0x06, 0x02, 0x67, 0x28, 0x33, 0x06, 0x02, 0x72,
+	0x28, 0x02, 0x00, 0x06, 0x1C, 0xD3, 0x80, 0x2E, 0x01, 0x81, 0x7F, 0x0E,
+	0x06, 0x0D, 0x25, 0x01, 0x10, 0xDE, 0x01, 0x00, 0xDD, 0x79, 0x2C, 0xAB,
+	0x24, 0x04, 0x04, 0xD6, 0x06, 0x01, 0xD4, 0x04, 0x01, 0xD6, 0x01, 0x7F,
+	0xD2, 0x01, 0x7F, 0xAF, 0x01, 0x01, 0x77, 0x3E, 0x01, 0x17, 0x87, 0x3E,
+	0x00, 0x00, 0x38, 0x38, 0x00, 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x01, 0x00,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_RSA | BR_KEYTYPE_KEYX), 0x04, 0x30, 0x01, 0x01,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_RSA | BR_KEYTYPE_SIGN), 0x04, 0x25, 0x01, 0x02,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_EC  | BR_KEYTYPE_SIGN), 0x04, 0x1A, 0x01, 0x03,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_EC  | BR_KEYTYPE_KEYX), 0x04, 0x0F, 0x01, 0x04,
+	0x38, 0x0E, 0x06, 0x05, 0x25, 0x01,
+	T0_INT1(BR_KEYTYPE_EC  | BR_KEYTYPE_KEYX), 0x04, 0x04, 0x01, 0x00,
+	0x44, 0x25, 0x00, 0x00, 0x82, 0x2E, 0x01, 0x0E, 0x0E, 0x06, 0x04, 0x01,
+	0x00, 0x04, 0x02, 0x01, 0x05, 0x00, 0x00, 0x40, 0x06, 0x04, 0x01, 0x06,
+	0x04, 0x02, 0x01, 0x00, 0x00, 0x00, 0x88, 0x2E, 0x26, 0x06, 0x08, 0x01,
+	0x01, 0x09, 0x01, 0x11, 0x07, 0x04, 0x03, 0x25, 0x01, 0x05, 0x00, 0x01,
+	0x41, 0x03, 0x00, 0x25, 0x01, 0x00, 0x43, 0x06, 0x03, 0x02, 0x00, 0x08,
+	0x42, 0x06, 0x03, 0x02, 0x00, 0x08, 0x26, 0x06, 0x06, 0x01, 0x01, 0x0B,
+	0x01, 0x06, 0x08, 0x00, 0x00, 0x8B, 0x3F, 0x26, 0x06, 0x03, 0x01, 0x09,
+	0x08, 0x00, 0x01, 0x40, 0x26, 0x06, 0x1E, 0x01, 0x00, 0x03, 0x00, 0x26,
+	0x06, 0x0E, 0x26, 0x01, 0x01, 0x17, 0x02, 0x00, 0x08, 0x03, 0x00, 0x01,
+	0x01, 0x11, 0x04, 0x6F, 0x25, 0x02, 0x00, 0x01, 0x01, 0x0B, 0x01, 0x06,
+	0x08, 0x00, 0x00, 0x7F, 0x2D, 0x44, 0x11, 0x01, 0x01, 0x17, 0x35, 0x00,
+	0x00, 0x9F, 0xCE, 0x26, 0x01, 0x07, 0x17, 0x01, 0x00, 0x38, 0x0E, 0x06,
+	0x09, 0x25, 0x01, 0x10, 0x17, 0x06, 0x01, 0x9F, 0x04, 0x35, 0x01, 0x01,
+	0x38, 0x0E, 0x06, 0x2C, 0x25, 0x25, 0x01, 0x00, 0x77, 0x3E, 0xB3, 0x88,
+	0x2E, 0x01, 0x01, 0x0E, 0x01, 0x01, 0xA8, 0x37, 0x06, 0x17, 0x29, 0x1A,
+	0x36, 0x06, 0x04, 0xCE, 0x25, 0x04, 0x78, 0x01, 0x80, 0x64, 0xC5, 0x01,
+	0x01, 0x77, 0x3E, 0x01, 0x17, 0x87, 0x3E, 0x04, 0x01, 0x9F, 0x04, 0x03,
+	0x72, 0x28, 0x25, 0x04, 0xFF, 0x34, 0x01, 0x26, 0x03, 0x00, 0x09, 0x26,
+	0x58, 0x06, 0x02, 0x68, 0x28, 0x02, 0x00, 0x00, 0x00, 0x9A, 0x01, 0x0F,
+	0x17, 0x00, 0x00, 0x76, 0x2E, 0x01, 0x00, 0x38, 0x0E, 0x06, 0x10, 0x25,
+	0x26, 0x01, 0x01, 0x0D, 0x06, 0x03, 0x25, 0x01, 0x02, 0x76, 0x3E, 0x01,
+	0x00, 0x04, 0x21, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x14, 0x25, 0x01, 0x00,
+	0x76, 0x3E, 0x26, 0x01, 0x80, 0x64, 0x0E, 0x06, 0x05, 0x01, 0x82, 0x00,
+	0x08, 0x28, 0x5A, 0x04, 0x07, 0x25, 0x01, 0x82, 0x00, 0x08, 0x28, 0x25,
+	0x00, 0x00, 0x01, 0x00, 0x2F, 0x06, 0x05, 0x3A, 0xAC, 0x37, 0x04, 0x78,
+	0x26, 0x06, 0x04, 0x01, 0x01, 0x8F, 0x3E, 0x00, 0x01, 0xBF, 0xAA, 0xBF,
+	0xAA, 0xC1, 0x84, 0x44, 0x26, 0x03, 0x00, 0xB6, 0x9B, 0x9B, 0x02, 0x00,
+	0x4D, 0x26, 0x58, 0x06, 0x0A, 0x01, 0x03, 0xA8, 0x06, 0x02, 0x72, 0x28,
+	0x25, 0x04, 0x03, 0x5C, 0x8A, 0x3C, 0x00, 0x00, 0x2F, 0x06, 0x0B, 0x86,
+	0x2E, 0x01, 0x14, 0x0D, 0x06, 0x02, 0x72, 0x28, 0x04, 0x11, 0xCE, 0x01,
+	0x07, 0x17, 0x26, 0x01, 0x02, 0x0D, 0x06, 0x06, 0x06, 0x02, 0x72, 0x28,
+	0x04, 0x70, 0x25, 0xC2, 0x01, 0x01, 0x0D, 0x33, 0x37, 0x06, 0x02, 0x61,
+	0x28, 0x26, 0x01, 0x01, 0xC8, 0x36, 0xB2, 0x00, 0x01, 0xB8, 0x01, 0x0B,
+	0x0E, 0x05, 0x02, 0x72, 0x28, 0x26, 0x01, 0x03, 0x0E, 0x06, 0x08, 0xC0,
+	0x06, 0x02, 0x68, 0x28, 0x44, 0x25, 0x00, 0x44, 0x57, 0xC0, 0xAA, 0x26,
+	0x06, 0x23, 0xC0, 0xAA, 0x26, 0x56, 0x26, 0x06, 0x18, 0x26, 0x01, 0x82,
+	0x00, 0x0F, 0x06, 0x05, 0x01, 0x82, 0x00, 0x04, 0x01, 0x26, 0x03, 0x00,
+	0x84, 0x02, 0x00, 0xB6, 0x02, 0x00, 0x53, 0x04, 0x65, 0x9B, 0x54, 0x04,
+	0x5A, 0x9B, 0x9B, 0x55, 0x26, 0x06, 0x02, 0x35, 0x00, 0x25, 0x2B, 0x00,
+	0x00, 0x79, 0x2C, 0xA1, 0x01, 0x7F, 0xB0, 0x26, 0x58, 0x06, 0x02, 0x35,
+	0x28, 0x26, 0x05, 0x02, 0x72, 0x28, 0x38, 0x17, 0x0D, 0x06, 0x02, 0x74,
+	0x28, 0x3B, 0x00, 0x00, 0x9C, 0xB8, 0x01, 0x14, 0x0D, 0x06, 0x02, 0x72,
+	0x28, 0x84, 0x01, 0x0C, 0x08, 0x01, 0x0C, 0xB6, 0x9B, 0x84, 0x26, 0x01,
+	0x0C, 0x08, 0x01, 0x0C, 0x30, 0x05, 0x02, 0x64, 0x28, 0x00, 0x00, 0xB9,
+	0x06, 0x02, 0x72, 0x28, 0x06, 0x02, 0x66, 0x28, 0x00, 0x0A, 0xB8, 0x01,
+	0x02, 0x0E, 0x05, 0x02, 0x72, 0x28, 0xBF, 0x03, 0x00, 0x02, 0x00, 0x95,
+	0x2C, 0x0A, 0x02, 0x00, 0x94, 0x2C, 0x0F, 0x37, 0x06, 0x02, 0x73, 0x28,
+	0x02, 0x00, 0x93, 0x2C, 0x0D, 0x06, 0x02, 0x6B, 0x28, 0x02, 0x00, 0x96,
+	0x3C, 0x8C, 0x01, 0x20, 0xB6, 0x01, 0x00, 0x03, 0x01, 0xC1, 0x03, 0x02,
+	0x02, 0x02, 0x01, 0x20, 0x0F, 0x06, 0x02, 0x70, 0x28, 0x84, 0x02, 0x02,
+	0xB6, 0x02, 0x02, 0x8E, 0x2E, 0x0E, 0x02, 0x02, 0x01, 0x00, 0x0F, 0x17,
+	0x06, 0x0B, 0x8D, 0x84, 0x02, 0x02, 0x30, 0x06, 0x04, 0x01, 0x7F, 0x03,
+	0x01, 0x8D, 0x84, 0x02, 0x02, 0x31, 0x02, 0x02, 0x8E, 0x3E, 0x02, 0x00,
+	0x92, 0x02, 0x01, 0x98, 0xBF, 0x26, 0xC3, 0x58, 0x06, 0x02, 0x62, 0x28,
+	0x26, 0xCD, 0x02, 0x00, 0x01, 0x86, 0x03, 0x0A, 0x17, 0x06, 0x02, 0x62,
+	0x28, 0x79, 0x02, 0x01, 0x98, 0xC1, 0x06, 0x02, 0x63, 0x28, 0x26, 0x06,
+	0x81, 0x47, 0xBF, 0xAA, 0xA6, 0x03, 0x03, 0xA4, 0x03, 0x04, 0xA2, 0x03,
+	0x05, 0xA5, 0x03, 0x06, 0xA7, 0x03, 0x07, 0xA3, 0x03, 0x08, 0x27, 0x03,
+	0x09, 0x26, 0x06, 0x81, 0x18, 0xBF, 0x01, 0x00, 0x38, 0x0E, 0x06, 0x0F,
+	0x25, 0x02, 0x03, 0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x03, 0xBE,
+	0x04, 0x80, 0x7F, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x0F, 0x25, 0x02, 0x05,
+	0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x05, 0xBC, 0x04, 0x80, 0x6A,
+	0x01, 0x83, 0xFE, 0x01, 0x38, 0x0E, 0x06, 0x0F, 0x25, 0x02, 0x04, 0x05,
+	0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x04, 0xBD, 0x04, 0x80, 0x53, 0x01,
+	0x0D, 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x02, 0x06, 0x05, 0x02, 0x6C, 0x28,
+	0x01, 0x00, 0x03, 0x06, 0xBA, 0x04, 0x3F, 0x01, 0x0A, 0x38, 0x0E, 0x06,
+	0x0E, 0x25, 0x02, 0x07, 0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x07,
+	0xBA, 0x04, 0x2B, 0x01, 0x0B, 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x02, 0x08,
+	0x05, 0x02, 0x6C, 0x28, 0x01, 0x00, 0x03, 0x08, 0xBA, 0x04, 0x17, 0x01,
+	0x10, 0x38, 0x0E, 0x06, 0x0E, 0x25, 0x02, 0x09, 0x05, 0x02, 0x6C, 0x28,
+	0x01, 0x00, 0x03, 0x09, 0xAE, 0x04, 0x03, 0x6C, 0x28, 0x25, 0x04, 0xFE,
+	0x64, 0x02, 0x04, 0x06, 0x0D, 0x02, 0x04, 0x01, 0x05, 0x0F, 0x06, 0x02,
+	0x69, 0x28, 0x01, 0x01, 0x88, 0x3E, 0x9B, 0x04, 0x0C, 0xA4, 0x01, 0x05,
+	0x0F, 0x06, 0x02, 0x69, 0x28, 0x01, 0x01, 0x88, 0x3E, 0x9B, 0x02, 0x01,
+	0x00, 0x04, 0xB8, 0x01, 0x0C, 0x0E, 0x05, 0x02, 0x72, 0x28, 0xC1, 0x01,
+	0x03, 0x0E, 0x05, 0x02, 0x6D, 0x28, 0xBF, 0x26, 0x7C, 0x3E, 0x26, 0x01,
+	0x20, 0x10, 0x06, 0x02, 0x6D, 0x28, 0x40, 0x44, 0x11, 0x01, 0x01, 0x17,
+	0x05, 0x02, 0x6D, 0x28, 0xC1, 0x26, 0x01, 0x81, 0x05, 0x0F, 0x06, 0x02,
+	0x6D, 0x28, 0x26, 0x7E, 0x3E, 0x7D, 0x44, 0xB6, 0x92, 0x2C, 0x01, 0x86,
+	0x03, 0x10, 0x03, 0x00, 0x79, 0x2C, 0xCB, 0x03, 0x01, 0x01, 0x02, 0x03,
+	0x02, 0x02, 0x00, 0x06, 0x21, 0xC1, 0x26, 0x26, 0x01, 0x02, 0x0A, 0x44,
+	0x01, 0x06, 0x0F, 0x37, 0x06, 0x02, 0x6D, 0x28, 0x03, 0x02, 0xC1, 0x02,
+	0x01, 0x01, 0x01, 0x0B, 0x01, 0x03, 0x08, 0x0E, 0x05, 0x02, 0x6D, 0x28,
+	0x04, 0x08, 0x02, 0x01, 0x06, 0x04, 0x01, 0x00, 0x03, 0x02, 0xBF, 0x26,
+	0x03, 0x03, 0x26, 0x01, 0x84, 0x00, 0x0F, 0x06, 0x02, 0x6E, 0x28, 0x84,
+	0x44, 0xB6, 0x02, 0x02, 0x02, 0x01, 0x02, 0x03, 0x50, 0x26, 0x06, 0x01,
+	0x28, 0x25, 0x9B, 0x00, 0x02, 0x03, 0x00, 0x03, 0x01, 0x02, 0x00, 0x97,
+	0x02, 0x01, 0x02, 0x00, 0x39, 0x26, 0x01, 0x00, 0x0E, 0x06, 0x02, 0x60,
+	0x00, 0xD0, 0x04, 0x74, 0x02, 0x01, 0x00, 0x03, 0x00, 0xC1, 0xAA, 0x26,
+	0x06, 0x80, 0x43, 0xC1, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x06, 0x25, 0x01,
+	0x81, 0x7F, 0x04, 0x2E, 0x01, 0x80, 0x40, 0x38, 0x0E, 0x06, 0x07, 0x25,
+	0x01, 0x83, 0xFE, 0x00, 0x04, 0x20, 0x01, 0x80, 0x41, 0x38, 0x0E, 0x06,
+	0x07, 0x25, 0x01, 0x84, 0x80, 0x00, 0x04, 0x12, 0x01, 0x80, 0x42, 0x38,
+	0x0E, 0x06, 0x07, 0x25, 0x01, 0x88, 0x80, 0x00, 0x04, 0x04, 0x01, 0x00,
+	0x44, 0x25, 0x02, 0x00, 0x37, 0x03, 0x00, 0x04, 0xFF, 0x39, 0x9B, 0x79,
+	0x2C, 0xC9, 0x05, 0x09, 0x02, 0x00, 0x01, 0x83, 0xFF, 0x7F, 0x17, 0x03,
+	0x00, 0x92, 0x2C, 0x01, 0x86, 0x03, 0x10, 0x06, 0x3A, 0xBB, 0x26, 0x81,
+	0x3D, 0x41, 0x25, 0x26, 0x01, 0x08, 0x0B, 0x37, 0x01, 0x8C, 0x80, 0x00,
+	0x37, 0x17, 0x02, 0x00, 0x17, 0x02, 0x00, 0x01, 0x8C, 0x80, 0x00, 0x17,
+	0x06, 0x19, 0x26, 0x01, 0x81, 0x7F, 0x17, 0x06, 0x05, 0x01, 0x84, 0x80,
+	0x00, 0x37, 0x26, 0x01, 0x83, 0xFE, 0x00, 0x17, 0x06, 0x05, 0x01, 0x88,
+	0x80, 0x00, 0x37, 0x03, 0x00, 0x04, 0x09, 0x02, 0x00, 0x01, 0x8C, 0x88,
+	0x01, 0x17, 0x03, 0x00, 0x16, 0xBF, 0xAA, 0x26, 0x06, 0x23, 0xBF, 0xAA,
+	0x26, 0x15, 0x26, 0x06, 0x18, 0x26, 0x01, 0x82, 0x00, 0x0F, 0x06, 0x05,
+	0x01, 0x82, 0x00, 0x04, 0x01, 0x26, 0x03, 0x01, 0x84, 0x02, 0x01, 0xB6,
+	0x02, 0x01, 0x12, 0x04, 0x65, 0x9B, 0x13, 0x04, 0x5A, 0x9B, 0x14, 0x9B,
+	0x02, 0x00, 0x2A, 0x00, 0x00, 0xB9, 0x26, 0x5A, 0x06, 0x07, 0x25, 0x06,
+	0x02, 0x66, 0x28, 0x04, 0x74, 0x00, 0x00, 0xC2, 0x01, 0x03, 0xC0, 0x44,
+	0x25, 0x44, 0x00, 0x00, 0xBF, 0xC6, 0x00, 0x03, 0x01, 0x00, 0x03, 0x00,
+	0xBF, 0xAA, 0x26, 0x06, 0x80, 0x50, 0xC1, 0x03, 0x01, 0xC1, 0x03, 0x02,
+	0x02, 0x01, 0x01, 0x08, 0x0E, 0x06, 0x16, 0x02, 0x02, 0x01, 0x0F, 0x0C,
+	0x06, 0x0D, 0x01, 0x01, 0x02, 0x02, 0x01, 0x10, 0x08, 0x0B, 0x02, 0x00,
+	0x37, 0x03, 0x00, 0x04, 0x2A, 0x02, 0x01, 0x01, 0x02, 0x10, 0x02, 0x01,
+	0x01, 0x06, 0x0C, 0x17, 0x02, 0x02, 0x01, 0x01, 0x0E, 0x02, 0x02, 0x01,
+	0x03, 0x0E, 0x37, 0x17, 0x06, 0x11, 0x02, 0x00, 0x01, 0x01, 0x02, 0x02,
+	0x5D, 0x01, 0x02, 0x0B, 0x02, 0x01, 0x08, 0x0B, 0x37, 0x03, 0x00, 0x04,
+	0xFF, 0x2C, 0x9B, 0x02, 0x00, 0x00, 0x00, 0xBF, 0x01, 0x01, 0x0E, 0x05,
+	0x02, 0x65, 0x28, 0xC1, 0x01, 0x08, 0x08, 0x82, 0x2E, 0x0E, 0x05, 0x02,
+	0x65, 0x28, 0x00, 0x00, 0xBF, 0x88, 0x2E, 0x05, 0x15, 0x01, 0x01, 0x0E,
+	0x05, 0x02, 0x69, 0x28, 0xC1, 0x01, 0x00, 0x0E, 0x05, 0x02, 0x69, 0x28,
+	0x01, 0x02, 0x88, 0x3E, 0x04, 0x1C, 0x01, 0x19, 0x0E, 0x05, 0x02, 0x69,
+	0x28, 0xC1, 0x01, 0x18, 0x0E, 0x05, 0x02, 0x69, 0x28, 0x84, 0x01, 0x18,
+	0xB6, 0x89, 0x84, 0x01, 0x18, 0x30, 0x05, 0x02, 0x69, 0x28, 0x00, 0x00,
+	0xBF, 0x06, 0x02, 0x6A, 0x28, 0x00, 0x00, 0x01, 0x02, 0x97, 0xC2, 0x01,
+	0x08, 0x0B, 0xC2, 0x08, 0x00, 0x00, 0x01, 0x03, 0x97, 0xC2, 0x01, 0x08,
+	0x0B, 0xC2, 0x08, 0x01, 0x08, 0x0B, 0xC2, 0x08, 0x00, 0x00, 0x01, 0x01,
+	0x97, 0xC2, 0x00, 0x00, 0x3A, 0x26, 0x58, 0x05, 0x01, 0x00, 0x25, 0xD0,
+	0x04, 0x76, 0x02, 0x03, 0x00, 0x91, 0x2E, 0x03, 0x01, 0x01, 0x00, 0x26,
+	0x02, 0x01, 0x0A, 0x06, 0x10, 0x26, 0x01, 0x01, 0x0B, 0x90, 0x08, 0x2C,
+	0x02, 0x00, 0x0E, 0x06, 0x01, 0x00, 0x5C, 0x04, 0x6A, 0x25, 0x01, 0x7F,
+	0x00, 0x00, 0x01, 0x15, 0x87, 0x3E, 0x44, 0x52, 0x25, 0x52, 0x25, 0x29,
+	0x00, 0x00, 0x01, 0x01, 0x44, 0xC4, 0x00, 0x00, 0x44, 0x38, 0x97, 0x44,
+	0x26, 0x06, 0x05, 0xC2, 0x25, 0x5D, 0x04, 0x78, 0x25, 0x00, 0x00, 0x26,
+	0x01, 0x81, 0xAC, 0x00, 0x0E, 0x06, 0x04, 0x25, 0x01, 0x7F, 0x00, 0x9A,
+	0x59, 0x00, 0x02, 0x03, 0x00, 0x79, 0x2C, 0x9A, 0x03, 0x01, 0x02, 0x01,
+	0x01, 0x0F, 0x17, 0x02, 0x01, 0x01, 0x04, 0x11, 0x01, 0x0F, 0x17, 0x02,
+	0x01, 0x01, 0x08, 0x11, 0x01, 0x0F, 0x17, 0x01, 0x00, 0x38, 0x0E, 0x06,
+	0x10, 0x25, 0x01, 0x00, 0x01, 0x18, 0x02, 0x00, 0x06, 0x03, 0x49, 0x04,
+	0x01, 0x4A, 0x04, 0x81, 0x0D, 0x01, 0x01, 0x38, 0x0E, 0x06, 0x10, 0x25,
+	0x01, 0x01, 0x01, 0x10, 0x02, 0x00, 0x06, 0x03, 0x49, 0x04, 0x01, 0x4A,
+	0x04, 0x80, 0x77, 0x01, 0x02, 0x38, 0x0E, 0x06, 0x10, 0x25, 0x01, 0x01,
+	0x01, 0x20, 0x02, 0x00, 0x06, 0x03, 0x49, 0x04, 0x01, 0x4A, 0x04, 0x80,
+	0x61, 0x01, 0x03, 0x38, 0x0E, 0x06, 0x0F, 0x25, 0x25, 0x01, 0x10, 0x02,
+	0x00, 0x06, 0x03, 0x47, 0x04, 0x01, 0x48, 0x04, 0x80, 0x4C, 0x01, 0x04,
+	0x38, 0x0E, 0x06, 0x0E, 0x25, 0x25, 0x01, 0x20, 0x02, 0x00, 0x06, 0x03,
+	0x47, 0x04, 0x01, 0x48, 0x04, 0x38, 0x01, 0x05, 0x38, 0x0E, 0x06, 0x0C,
+	0x25, 0x25, 0x02, 0x00, 0x06, 0x03, 0x4B, 0x04, 0x01, 0x4C, 0x04, 0x26,
+	0x26, 0x01, 0x09, 0x0F, 0x06, 0x02, 0x68, 0x28, 0x44, 0x25, 0x26, 0x01,
+	0x01, 0x17, 0x01, 0x04, 0x0B, 0x01, 0x10, 0x08, 0x44, 0x01, 0x08, 0x17,
+	0x01, 0x10, 0x44, 0x09, 0x02, 0x00, 0x06, 0x03, 0x45, 0x04, 0x01, 0x46,
+	0x00, 0x25, 0x00, 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x01, 0x02, 0x0F, 0x00,
+	0x00, 0x9A, 0x01, 0x0C, 0x11, 0x26, 0x5B, 0x44, 0x01, 0x03, 0x0A, 0x17,
+	0x00, 0x00, 0x9A, 0x01, 0x0C, 0x11, 0x01, 0x01, 0x0E, 0x00, 0x00, 0x9A,
+	0x01, 0x0C, 0x11, 0x5A, 0x00, 0x00, 0x9A, 0x01, 0x81, 0x70, 0x17, 0x01,
+	0x20, 0x0D, 0x00, 0x00, 0x1B, 0x01, 0x00, 0x75, 0x2E, 0x26, 0x06, 0x22,
+	0x01, 0x01, 0x38, 0x0E, 0x06, 0x06, 0x25, 0x01, 0x00, 0x9E, 0x04, 0x14,
+	0x01, 0x02, 0x38, 0x0E, 0x06, 0x0D, 0x25, 0x77, 0x2E, 0x01, 0x01, 0x0E,
+	0x06, 0x03, 0x01, 0x10, 0x37, 0x04, 0x01, 0x25, 0x04, 0x01, 0x25, 0x7B,
+	0x2E, 0x05, 0x33, 0x2F, 0x06, 0x30, 0x86, 0x2E, 0x01, 0x14, 0x38, 0x0E,
+	0x06, 0x06, 0x25, 0x01, 0x02, 0x37, 0x04, 0x22, 0x01, 0x15, 0x38, 0x0E,
+	0x06, 0x09, 0x25, 0xAD, 0x06, 0x03, 0x01, 0x7F, 0x9E, 0x04, 0x13, 0x01,
+	0x16, 0x38, 0x0E, 0x06, 0x06, 0x25, 0x01, 0x01, 0x37, 0x04, 0x07, 0x25,
+	0x01, 0x04, 0x37, 0x01, 0x00, 0x25, 0x1A, 0x06, 0x03, 0x01, 0x08, 0x37,
+	0x00, 0x00, 0x1B, 0x26, 0x05, 0x13, 0x2F, 0x06, 0x10, 0x86, 0x2E, 0x01,
+	0x15, 0x0E, 0x06, 0x08, 0x25, 0xAD, 0x01, 0x00, 0x77, 0x3E, 0x04, 0x01,
+	0x20, 0x00, 0x00, 0xCE, 0x01, 0x07, 0x17, 0x01, 0x01, 0x0F, 0x06, 0x02,
+	0x72, 0x28, 0x00, 0x01, 0x03, 0x00, 0x29, 0x1A, 0x06, 0x05, 0x02, 0x00,
+	0x87, 0x3E, 0x00, 0xCE, 0x25, 0x04, 0x74, 0x00, 0x01, 0x14, 0xD1, 0x01,
+	0x01, 0xDE, 0x29, 0x26, 0x01, 0x00, 0xC8, 0x01, 0x16, 0xD1, 0xD7, 0x29,
+	0x00, 0x00, 0x01, 0x0B, 0xDE, 0x4E, 0x26, 0x26, 0x01, 0x03, 0x08, 0xDD,
+	0xDD, 0x18, 0x26, 0x58, 0x06, 0x02, 0x25, 0x00, 0xDD, 0x1D, 0x26, 0x06,
+	0x05, 0x84, 0x44, 0xD8, 0x04, 0x77, 0x25, 0x04, 0x6C, 0x00, 0x21, 0x01,
+	0x0F, 0xDE, 0x26, 0x92, 0x2C, 0x01, 0x86, 0x03, 0x10, 0x06, 0x0C, 0x01,
+	0x04, 0x08, 0xDD, 0x80, 0x2E, 0xDE, 0x78, 0x2E, 0xDE, 0x04, 0x02, 0x5E,
+	0xDD, 0x26, 0xDC, 0x84, 0x44, 0xD8, 0x00, 0x02, 0xA4, 0xA6, 0x08, 0xA2,
+	0x08, 0xA5, 0x08, 0xA7, 0x08, 0xA3, 0x08, 0x27, 0x08, 0x03, 0x00, 0x01,
+	0x01, 0xDE, 0x01, 0x27, 0x8E, 0x2E, 0x08, 0x91, 0x2E, 0x01, 0x01, 0x0B,
+	0x08, 0x02, 0x00, 0x06, 0x04, 0x5E, 0x02, 0x00, 0x08, 0x83, 0x2C, 0x38,
+	0x09, 0x26, 0x5B, 0x06, 0x24, 0x02, 0x00, 0x05, 0x04, 0x44, 0x5E, 0x44,
+	0x5F, 0x01, 0x04, 0x09, 0x26, 0x58, 0x06, 0x03, 0x25, 0x01, 0x00, 0x26,
+	0x01, 0x04, 0x08, 0x02, 0x00, 0x08, 0x03, 0x00, 0x44, 0x01, 0x04, 0x08,
+	0x38, 0x08, 0x44, 0x04, 0x03, 0x25, 0x01, 0x7F, 0x03, 0x01, 0xDD, 0x94,
+	0x2C, 0xDC, 0x7A, 0x01, 0x04, 0x19, 0x7A, 0x01, 0x04, 0x08, 0x01, 0x1C,
+	0x32, 0x7A, 0x01, 0x20, 0xD8, 0x8D, 0x8E, 0x2E, 0xDA, 0x91, 0x2E, 0x26,
+	0x01, 0x01, 0x0B, 0xDC, 0x90, 0x44, 0x26, 0x06, 0x0F, 0x5D, 0x38, 0x2C,
+	0x26, 0xC7, 0x05, 0x02, 0x62, 0x28, 0xDC, 0x44, 0x5E, 0x44, 0x04, 0x6E,
+	0x60, 0x01, 0x01, 0xDE, 0x01, 0x00, 0xDE, 0x02, 0x00, 0x06, 0x81, 0x5A,
+	0x02, 0x00, 0xDC, 0xA4, 0x06, 0x0E, 0x01, 0x83, 0xFE, 0x01, 0xDC, 0x89,
+	0xA4, 0x01, 0x04, 0x09, 0x26, 0xDC, 0x5D, 0xDA, 0xA6, 0x06, 0x16, 0x01,
+	0x00, 0xDC, 0x8B, 0xA6, 0x01, 0x04, 0x09, 0x26, 0xDC, 0x01, 0x02, 0x09,
+	0x26, 0xDC, 0x01, 0x00, 0xDE, 0x01, 0x03, 0x09, 0xD9, 0xA2, 0x06, 0x0C,
+	0x01, 0x01, 0xDC, 0x01, 0x01, 0xDC, 0x82, 0x2E, 0x01, 0x08, 0x09, 0xDE,
+	0xA5, 0x06, 0x19, 0x01, 0x0D, 0xDC, 0xA5, 0x01, 0x04, 0x09, 0x26, 0xDC,
+	0x01, 0x02, 0x09, 0xDC, 0x42, 0x06, 0x03, 0x01, 0x03, 0xDB, 0x43, 0x06,
+	0x03, 0x01, 0x01, 0xDB, 0xA7, 0x26, 0x06, 0x36, 0x01, 0x0A, 0xDC, 0x01,
+	0x04, 0x09, 0x26, 0xDC, 0x5F, 0xDC, 0x40, 0x01, 0x00, 0x26, 0x01, 0x82,
+	0x80, 0x80, 0x80, 0x00, 0x17, 0x06, 0x0A, 0x01, 0xFD, 0xFF, 0xFF, 0xFF,
+	0x7F, 0x17, 0x01, 0x1D, 0xDC, 0x26, 0x01, 0x20, 0x0A, 0x06, 0x0C, 0xA0,
+	0x11, 0x01, 0x01, 0x17, 0x06, 0x02, 0x26, 0xDC, 0x5C, 0x04, 0x6E, 0x60,
+	0x04, 0x01, 0x25, 0xA3, 0x06, 0x0A, 0x01, 0x0B, 0xDC, 0x01, 0x02, 0xDC,
+	0x01, 0x82, 0x00, 0xDC, 0x27, 0x26, 0x06, 0x1F, 0x01, 0x10, 0xDC, 0x01,
+	0x04, 0x09, 0x26, 0xDC, 0x5F, 0xDC, 0x85, 0x2C, 0x01, 0x00, 0xA0, 0x0F,
+	0x06, 0x0A, 0x26, 0x1E, 0x26, 0xDE, 0x84, 0x44, 0xD8, 0x5C, 0x04, 0x72,
+	0x60, 0x04, 0x01, 0x25, 0x02, 0x01, 0x58, 0x05, 0x11, 0x01, 0x15, 0xDC,
+	0x02, 0x01, 0x26, 0xDC, 0x26, 0x06, 0x06, 0x5D, 0x01, 0x00, 0xDE, 0x04,
+	0x77, 0x25, 0x00, 0x00, 0x01, 0x10, 0xDE, 0x79, 0x2C, 0x26, 0xCC, 0x06,
+	0x0C, 0xAB, 0x23, 0x26, 0x5E, 0xDD, 0x26, 0xDC, 0x84, 0x44, 0xD8, 0x04,
+	0x0D, 0x26, 0xCA, 0x44, 0xAB, 0x22, 0x26, 0x5C, 0xDD, 0x26, 0xDE, 0x84,
+	0x44, 0xD8, 0x00, 0x00, 0x9C, 0x01, 0x14, 0xDE, 0x01, 0x0C, 0xDD, 0x84,
+	0x01, 0x0C, 0xD8, 0x00, 0x00, 0x51, 0x26, 0x01, 0x00, 0x0E, 0x06, 0x02,
+	0x60, 0x00, 0xCE, 0x25, 0x04, 0x73, 0x00, 0x26, 0xDC, 0xD8, 0x00, 0x00,
+	0x26, 0xDE, 0xD8, 0x00, 0x01, 0x03, 0x00, 0x41, 0x25, 0x26, 0x01, 0x10,
+	0x17, 0x06, 0x06, 0x01, 0x04, 0xDE, 0x02, 0x00, 0xDE, 0x26, 0x01, 0x08,
+	0x17, 0x06, 0x06, 0x01, 0x03, 0xDE, 0x02, 0x00, 0xDE, 0x26, 0x01, 0x20,
+	0x17, 0x06, 0x06, 0x01, 0x05, 0xDE, 0x02, 0x00, 0xDE, 0x26, 0x01, 0x80,
+	0x40, 0x17, 0x06, 0x06, 0x01, 0x06, 0xDE, 0x02, 0x00, 0xDE, 0x01, 0x04,
+	0x17, 0x06, 0x06, 0x01, 0x02, 0xDE, 0x02, 0x00, 0xDE, 0x00, 0x00, 0x26,
+	0x01, 0x08, 0x4F, 0xDE, 0xDE, 0x00, 0x00, 0x26, 0x01, 0x10, 0x4F, 0xDE,
+	0xDC, 0x00, 0x00, 0x26, 0x52, 0x06, 0x02, 0x25, 0x00, 0xCE, 0x25, 0x04,
+	0x76
+};
+
+static const uint16_t t0_caddr[] = {
+	0,
+	5,
+	10,
+	15,
+	20,
+	25,
+	30,
+	35,
+	40,
+	44,
+	48,
+	52,
+	56,
+	60,
+	64,
+	68,
+	72,
+	76,
+	80,
+	84,
+	88,
+	92,
+	96,
+	100,
+	104,
+	108,
+	112,
+	116,
+	120,
+	124,
+	129,
+	134,
+	139,
+	144,
+	149,
+	154,
+	159,
+	164,
+	169,
+	174,
+	179,
+	184,
+	189,
+	194,
+	199,
+	204,
+	209,
+	214,
+	219,
+	224,
+	229,
+	234,
+	239,
+	244,
+	249,
+	254,
+	259,
+	264,
+	269,
+	274,
+	279,
+	284,
+	289,
+	294,
+	303,
+	316,
+	320,
+	345,
+	351,
+	370,
+	381,
+	422,
+	542,
+	546,
+	611,
+	626,
+	637,
+	655,
+	684,
+	694,
+	730,
+	740,
+	818,
+	832,
+	838,
+	897,
+	916,
+	951,
+	1000,
+	1076,
+	1103,
+	1134,
+	1145,
+	1497,
+	1644,
+	1668,
+	1884,
+	1898,
+	1907,
+	1911,
+	2006,
+	2027,
+	2083,
+	2090,
+	2101,
+	2117,
+	2123,
+	2134,
+	2169,
+	2181,
+	2187,
+	2202,
+	2218,
+	2411,
+	2420,
+	2433,
+	2442,
+	2449,
+	2459,
+	2565,
+	2590,
+	2603,
+	2619,
+	2637,
+	2669,
+	2703,
+	3071,
+	3107,
+	3120,
+	3134,
+	3139,
+	3144,
+	3210,
+	3218,
+	3226
+};
+
+#define T0_INTERPRETED   88
+
+#define T0_ENTER(ip, rp, slot)   do { \
+		const unsigned char *t0_newip; \
+		uint32_t t0_lnum; \
+		t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
+		t0_lnum = t0_parse7E_unsigned(&t0_newip); \
+		(rp) += t0_lnum; \
+		*((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
+		(ip) = t0_newip; \
+	} while (0)
+
+#define T0_DEFENTRY(name, slot) \
+void \
+name(void *ctx) \
+{ \
+	t0_context *t0ctx = ctx; \
+	t0ctx->ip = &t0_codeblock[0]; \
+	T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
+}
+
+T0_DEFENTRY(br_ssl_hs_client_init_main, 169)
+
+#define T0_NEXT(t0ipp)   (*(*(t0ipp)) ++)
+
+void
+br_ssl_hs_client_run(void *t0ctx)
+{
+	uint32_t *dp, *rp;
+	const unsigned char *ip;
+
+#define T0_LOCAL(x)    (*(rp - 2 - (x)))
+#define T0_POP()       (*-- dp)
+#define T0_POPi()      (*(int32_t *)(-- dp))
+#define T0_PEEK(x)     (*(dp - 1 - (x)))
+#define T0_PEEKi(x)    (*(int32_t *)(dp - 1 - (x)))
+#define T0_PUSH(v)     do { *dp = (v); dp ++; } while (0)
+#define T0_PUSHi(v)    do { *(int32_t *)dp = (v); dp ++; } while (0)
+#define T0_RPOP()      (*-- rp)
+#define T0_RPOPi()     (*(int32_t *)(-- rp))
+#define T0_RPUSH(v)    do { *rp = (v); rp ++; } while (0)
+#define T0_RPUSHi(v)   do { *(int32_t *)rp = (v); rp ++; } while (0)
+#define T0_ROLL(x)     do { \
+	size_t t0len = (size_t)(x); \
+	uint32_t t0tmp = *(dp - 1 - t0len); \
+	memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_SWAP()      do { \
+	uint32_t t0tmp = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_ROT()       do { \
+	uint32_t t0tmp = *(dp - 3); \
+	*(dp - 3) = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_NROT()       do { \
+	uint32_t t0tmp = *(dp - 1); \
+	*(dp - 1) = *(dp - 2); \
+	*(dp - 2) = *(dp - 3); \
+	*(dp - 3) = t0tmp; \
+} while (0)
+#define T0_PICK(x)      do { \
+	uint32_t t0depth = (x); \
+	T0_PUSH(T0_PEEK(t0depth)); \
+} while (0)
+#define T0_CO()         do { \
+	goto t0_exit; \
+} while (0)
+#define T0_RET()        goto t0_next
+
+	dp = ((t0_context *)t0ctx)->dp;
+	rp = ((t0_context *)t0ctx)->rp;
+	ip = ((t0_context *)t0ctx)->ip;
+	goto t0_next;
+	for (;;) {
+		uint32_t t0x;
+
+	t0_next:
+		t0x = T0_NEXT(&ip);
+		if (t0x < T0_INTERPRETED) {
+			switch (t0x) {
+				int32_t t0off;
+
+			case 0: /* ret */
+				t0x = T0_RPOP();
+				rp -= (t0x >> 16);
+				t0x &= 0xFFFF;
+				if (t0x == 0) {
+					ip = NULL;
+					goto t0_exit;
+				}
+				ip = &t0_codeblock[t0x];
+				break;
+			case 1: /* literal constant */
+				T0_PUSHi(t0_parse7E_signed(&ip));
+				break;
+			case 2: /* read local */
+				T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
+				break;
+			case 3: /* write local */
+				T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
+				break;
+			case 4: /* jump */
+				t0off = t0_parse7E_signed(&ip);
+				ip += t0off;
+				break;
+			case 5: /* jump if */
+				t0off = t0_parse7E_signed(&ip);
+				if (T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 6: /* jump if not */
+				t0off = t0_parse7E_signed(&ip);
+				if (!T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 7: {
+				/* * */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a * b);
+
+				}
+				break;
+			case 8: {
+				/* + */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a + b);
+
+				}
+				break;
+			case 9: {
+				/* - */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a - b);
+
+				}
+				break;
+			case 10: {
+				/* < */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a < b));
+
+				}
+				break;
+			case 11: {
+				/* << */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x << c);
+
+				}
+				break;
+			case 12: {
+				/* <= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a <= b));
+
+				}
+				break;
+			case 13: {
+				/* <> */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a != b));
+
+				}
+				break;
+			case 14: {
+				/* = */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a == b));
+
+				}
+				break;
+			case 15: {
+				/* > */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a > b));
+
+				}
+				break;
+			case 16: {
+				/* >= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a >= b));
+
+				}
+				break;
+			case 17: {
+				/* >> */
+
+	int c = (int)T0_POPi();
+	int32_t x = T0_POPi();
+	T0_PUSHi(x >> c);
+
+				}
+				break;
+			case 18: {
+				/* anchor-dn-append-name */
+
+	size_t len;
+
+	len = T0_POP();
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->append_name(
+			CTX->client_auth_vtable, ENG->pad, len);
+	}
+
+				}
+				break;
+			case 19: {
+				/* anchor-dn-end-name */
+
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->end_name(
+			CTX->client_auth_vtable);
+	}
+
+				}
+				break;
+			case 20: {
+				/* anchor-dn-end-name-list */
+
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->end_name_list(
+			CTX->client_auth_vtable);
+	}
+
+				}
+				break;
+			case 21: {
+				/* anchor-dn-start-name */
+
+	size_t len;
+
+	len = T0_POP();
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->start_name(
+			CTX->client_auth_vtable, len);
+	}
+
+				}
+				break;
+			case 22: {
+				/* anchor-dn-start-name-list */
+
+	if (CTX->client_auth_vtable != NULL) {
+		(*CTX->client_auth_vtable)->start_name_list(
+			CTX->client_auth_vtable);
+	}
+
+				}
+				break;
+			case 23: {
+				/* and */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a & b);
+
+				}
+				break;
+			case 24: {
+				/* begin-cert */
+
+	if (ENG->chain_len == 0) {
+		T0_PUSHi(-1);
+	} else {
+		ENG->cert_cur = ENG->chain->data;
+		ENG->cert_len = ENG->chain->data_len;
+		ENG->chain ++;
+		ENG->chain_len --;
+		T0_PUSH(ENG->cert_len);
+	}
+
+				}
+				break;
+			case 25: {
+				/* bzero */
+
+	size_t len = (size_t)T0_POP();
+	void *addr = (unsigned char *)ENG + (size_t)T0_POP();
+	memset(addr, 0, len);
+
+				}
+				break;
+			case 26: {
+				/* can-output? */
+
+	T0_PUSHi(-(ENG->hlen_out > 0));
+
+				}
+				break;
+			case 27: {
+				/* co */
+ T0_CO(); 
+				}
+				break;
+			case 28: {
+				/* compute-Finished-inner */
+
+	int prf_id = T0_POP();
+	int from_client = T0_POPi();
+	unsigned char tmp[48];
+	br_tls_prf_seed_chunk seed;
+
+	br_tls_prf_impl prf = br_ssl_engine_get_PRF(ENG, prf_id);
+	seed.data = tmp;
+	if (ENG->session.version >= BR_TLS12) {
+		seed.len = br_multihash_out(&ENG->mhash, prf_id, tmp);
+	} else {
+		br_multihash_out(&ENG->mhash, br_md5_ID, tmp);
+		br_multihash_out(&ENG->mhash, br_sha1_ID, tmp + 16);
+		seed.len = 36;
+	}
+	prf(ENG->pad, 12, ENG->session.master_secret,
+		sizeof ENG->session.master_secret,
+		from_client ? "client finished" : "server finished",
+		1, &seed);
+
+				}
+				break;
+			case 29: {
+				/* copy-cert-chunk */
+
+	size_t clen;
+
+	clen = ENG->cert_len;
+	if (clen > sizeof ENG->pad) {
+		clen = sizeof ENG->pad;
+	}
+	memcpy(ENG->pad, ENG->cert_cur, clen);
+	ENG->cert_cur += clen;
+	ENG->cert_len -= clen;
+	T0_PUSH(clen);
+
+				}
+				break;
+			case 30: {
+				/* copy-protocol-name */
+
+	size_t idx = T0_POP();
+	size_t len = strlen(ENG->protocol_names[idx]);
+	memcpy(ENG->pad, ENG->protocol_names[idx], len);
+	T0_PUSH(len);
+
+				}
+				break;
+			case 31: {
+				/* data-get8 */
+
+	size_t addr = T0_POP();
+	T0_PUSH(t0_datablock[addr]);
+
+				}
+				break;
+			case 32: {
+				/* discard-input */
+
+	ENG->hlen_in = 0;
+
+				}
+				break;
+			case 33: {
+				/* do-client-sign */
+
+	size_t sig_len;
+
+	sig_len = make_client_sign(CTX);
+	if (sig_len == 0) {
+		br_ssl_engine_fail(ENG, BR_ERR_INVALID_ALGORITHM);
+		T0_CO();
+	}
+	T0_PUSH(sig_len);
+
+				}
+				break;
+			case 34: {
+				/* do-ecdh */
+
+	unsigned prf_id = T0_POP();
+	unsigned ecdhe = T0_POP();
+	int x;
+
+	x = make_pms_ecdh(CTX, ecdhe, prf_id);
+	if (x < 0) {
+		br_ssl_engine_fail(ENG, -x);
+		T0_CO();
+	} else {
+		T0_PUSH(x);
+	}
+
+				}
+				break;
+			case 35: {
+				/* do-rsa-encrypt */
+
+	int x;
+
+	x = make_pms_rsa(CTX, T0_POP());
+	if (x < 0) {
+		br_ssl_engine_fail(ENG, -x);
+		T0_CO();
+	} else {
+		T0_PUSH(x);
+	}
+
+				}
+				break;
+			case 36: {
+				/* do-static-ecdh */
+
+	unsigned prf_id = T0_POP();
+
+	if (make_pms_static_ecdh(CTX, prf_id) < 0) {
+		br_ssl_engine_fail(ENG, BR_ERR_INVALID_ALGORITHM);
+		T0_CO();
+	}
+
+				}
+				break;
+			case 37: {
+				/* drop */
+ (void)T0_POP(); 
+				}
+				break;
+			case 38: {
+				/* dup */
+ T0_PUSH(T0_PEEK(0)); 
+				}
+				break;
+			case 39: {
+				/* ext-ALPN-length */
+
+	size_t u, len;
+
+	if (ENG->protocol_names_num == 0) {
+		T0_PUSH(0);
+		T0_RET();
+	}
+	len = 6;
+	for (u = 0; u < ENG->protocol_names_num; u ++) {
+		len += 1 + strlen(ENG->protocol_names[u]);
+	}
+	T0_PUSH(len);
+
+				}
+				break;
+			case 40: {
+				/* fail */
+
+	br_ssl_engine_fail(ENG, (int)T0_POPi());
+	T0_CO();
+
+				}
+				break;
+			case 41: {
+				/* flush-record */
+
+	br_ssl_engine_flush_record(ENG);
+
+				}
+				break;
+			case 42: {
+				/* get-client-chain */
+
+	uint32_t auth_types;
+
+	auth_types = T0_POP();
+	if (CTX->client_auth_vtable != NULL) {
+		br_ssl_client_certificate ux;
+
+		(*CTX->client_auth_vtable)->choose(CTX->client_auth_vtable,
+			CTX, auth_types, &ux);
+		CTX->auth_type = (unsigned char)ux.auth_type;
+		CTX->hash_id = (unsigned char)ux.hash_id;
+		ENG->chain = ux.chain;
+		ENG->chain_len = ux.chain_len;
+	} else {
+		CTX->hash_id = 0;
+		ENG->chain_len = 0;
+	}
+
+				}
+				break;
+			case 43: {
+				/* get-key-type-usages */
+
+	const br_x509_class *xc;
+	const br_x509_pkey *pk;
+	unsigned usages;
+
+	xc = *(ENG->x509ctx);
+	pk = xc->get_pkey(ENG->x509ctx, &usages);
+	if (pk == NULL) {
+		T0_PUSH(0);
+	} else {
+		T0_PUSH(pk->key_type | usages);
+	}
+
+				}
+				break;
+			case 44: {
+				/* get16 */
+
+	size_t addr = (size_t)T0_POP();
+	T0_PUSH(*(uint16_t *)(void *)((unsigned char *)ENG + addr));
+
+				}
+				break;
+			case 45: {
+				/* get32 */
+
+	size_t addr = (size_t)T0_POP();
+	T0_PUSH(*(uint32_t *)(void *)((unsigned char *)ENG + addr));
+
+				}
+				break;
+			case 46: {
+				/* get8 */
+
+	size_t addr = (size_t)T0_POP();
+	T0_PUSH(*((unsigned char *)ENG + addr));
+
+				}
+				break;
+			case 47: {
+				/* has-input? */
+
+	T0_PUSHi(-(ENG->hlen_in != 0));
+
+				}
+				break;
+			case 48: {
+				/* memcmp */
+
+	size_t len = (size_t)T0_POP();
+	void *addr2 = (unsigned char *)ENG + (size_t)T0_POP();
+	void *addr1 = (unsigned char *)ENG + (size_t)T0_POP();
+	int x = memcmp(addr1, addr2, len);
+	T0_PUSH((uint32_t)-(x == 0));
+
+				}
+				break;
+			case 49: {
+				/* memcpy */
+
+	size_t len = (size_t)T0_POP();
+	void *src = (unsigned char *)ENG + (size_t)T0_POP();
+	void *dst = (unsigned char *)ENG + (size_t)T0_POP();
+	memcpy(dst, src, len);
+
+				}
+				break;
+			case 50: {
+				/* mkrand */
+
+	size_t len = (size_t)T0_POP();
+	void *addr = (unsigned char *)ENG + (size_t)T0_POP();
+	br_hmac_drbg_generate(&ENG->rng, addr, len);
+
+				}
+				break;
+			case 51: {
+				/* more-incoming-bytes? */
+
+	T0_PUSHi(ENG->hlen_in != 0 || !br_ssl_engine_recvrec_finished(ENG));
+
+				}
+				break;
+			case 52: {
+				/* multihash-init */
+
+	br_multihash_init(&ENG->mhash);
+
+				}
+				break;
+			case 53: {
+				/* neg */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(-a);
+
+				}
+				break;
+			case 54: {
+				/* not */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(~a);
+
+				}
+				break;
+			case 55: {
+				/* or */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a | b);
+
+				}
+				break;
+			case 56: {
+				/* over */
+ T0_PUSH(T0_PEEK(1)); 
+				}
+				break;
+			case 57: {
+				/* read-chunk-native */
+
+	size_t clen = ENG->hlen_in;
+	if (clen > 0) {
+		uint32_t addr, len;
+
+		len = T0_POP();
+		addr = T0_POP();
+		if ((size_t)len < clen) {
+			clen = (size_t)len;
+		}
+		memcpy((unsigned char *)ENG + addr, ENG->hbuf_in, clen);
+		if (ENG->record_type_in == BR_SSL_HANDSHAKE) {
+			br_multihash_update(&ENG->mhash, ENG->hbuf_in, clen);
+		}
+		T0_PUSH(addr + (uint32_t)clen);
+		T0_PUSH(len - (uint32_t)clen);
+		ENG->hbuf_in += clen;
+		ENG->hlen_in -= clen;
+	}
+
+				}
+				break;
+			case 58: {
+				/* read8-native */
+
+	if (ENG->hlen_in > 0) {
+		unsigned char x;
+
+		x = *ENG->hbuf_in ++;
+		if (ENG->record_type_in == BR_SSL_HANDSHAKE) {
+			br_multihash_update(&ENG->mhash, &x, 1);
+		}
+		T0_PUSH(x);
+		ENG->hlen_in --;
+	} else {
+		T0_PUSHi(-1);
+	}
+
+				}
+				break;
+			case 59: {
+				/* set-server-curve */
+
+	const br_x509_class *xc;
+	const br_x509_pkey *pk;
+
+	xc = *(ENG->x509ctx);
+	pk = xc->get_pkey(ENG->x509ctx, NULL);
+	CTX->server_curve =
+		(pk->key_type == BR_KEYTYPE_EC) ? pk->key.ec.curve : 0;
+
+				}
+				break;
+			case 60: {
+				/* set16 */
+
+	size_t addr = (size_t)T0_POP();
+	*(uint16_t *)(void *)((unsigned char *)ENG + addr) = (uint16_t)T0_POP();
+
+				}
+				break;
+			case 61: {
+				/* set32 */
+
+	size_t addr = (size_t)T0_POP();
+	*(uint32_t *)(void *)((unsigned char *)ENG + addr) = (uint32_t)T0_POP();
+
+				}
+				break;
+			case 62: {
+				/* set8 */
+
+	size_t addr = (size_t)T0_POP();
+	*((unsigned char *)ENG + addr) = (unsigned char)T0_POP();
+
+				}
+				break;
+			case 63: {
+				/* strlen */
+
+	void *str = (unsigned char *)ENG + (size_t)T0_POP();
+	T0_PUSH((uint32_t)strlen(str));
+
+				}
+				break;
+			case 64: {
+				/* supported-curves */
+
+	uint32_t x = ENG->iec == NULL ? 0 : ENG->iec->supported_curves;
+	T0_PUSH(x);
+
+				}
+				break;
+			case 65: {
+				/* supported-hash-functions */
+
+	int i;
+	unsigned x, num;
+
+	x = 0;
+	num = 0;
+	for (i = br_sha1_ID; i <= br_sha512_ID; i ++) {
+		if (br_multihash_getimpl(&ENG->mhash, i)) {
+			x |= 1U << i;
+			num ++;
+		}
+	}
+	T0_PUSH(x);
+	T0_PUSH(num);
+
+				}
+				break;
+			case 66: {
+				/* supports-ecdsa? */
+
+	T0_PUSHi(-(ENG->iecdsa != 0));
+
+				}
+				break;
+			case 67: {
+				/* supports-rsa-sign? */
+
+	T0_PUSHi(-(ENG->irsavrfy != 0));
+
+				}
+				break;
+			case 68: {
+				/* swap */
+ T0_SWAP(); 
+				}
+				break;
+			case 69: {
+				/* switch-aesccm-in */
+
+	int is_client, prf_id;
+	unsigned cipher_key_len, tag_len;
+
+	tag_len = T0_POP();
+	cipher_key_len = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_ccm_in(ENG, is_client, prf_id,
+		ENG->iaes_ctrcbc, cipher_key_len, tag_len);
+
+				}
+				break;
+			case 70: {
+				/* switch-aesccm-out */
+
+	int is_client, prf_id;
+	unsigned cipher_key_len, tag_len;
+
+	tag_len = T0_POP();
+	cipher_key_len = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_ccm_out(ENG, is_client, prf_id,
+		ENG->iaes_ctrcbc, cipher_key_len, tag_len);
+
+				}
+				break;
+			case 71: {
+				/* switch-aesgcm-in */
+
+	int is_client, prf_id;
+	unsigned cipher_key_len;
+
+	cipher_key_len = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_gcm_in(ENG, is_client, prf_id,
+		ENG->iaes_ctr, cipher_key_len);
+
+				}
+				break;
+			case 72: {
+				/* switch-aesgcm-out */
+
+	int is_client, prf_id;
+	unsigned cipher_key_len;
+
+	cipher_key_len = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_gcm_out(ENG, is_client, prf_id,
+		ENG->iaes_ctr, cipher_key_len);
+
+				}
+				break;
+			case 73: {
+				/* switch-cbc-in */
+
+	int is_client, prf_id, mac_id, aes;
+	unsigned cipher_key_len;
+
+	cipher_key_len = T0_POP();
+	aes = T0_POP();
+	mac_id = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_cbc_in(ENG, is_client, prf_id, mac_id,
+		aes ? ENG->iaes_cbcdec : ENG->ides_cbcdec, cipher_key_len);
+
+				}
+				break;
+			case 74: {
+				/* switch-cbc-out */
+
+	int is_client, prf_id, mac_id, aes;
+	unsigned cipher_key_len;
+
+	cipher_key_len = T0_POP();
+	aes = T0_POP();
+	mac_id = T0_POP();
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_cbc_out(ENG, is_client, prf_id, mac_id,
+		aes ? ENG->iaes_cbcenc : ENG->ides_cbcenc, cipher_key_len);
+
+				}
+				break;
+			case 75: {
+				/* switch-chapol-in */
+
+	int is_client, prf_id;
+
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_chapol_in(ENG, is_client, prf_id);
+
+				}
+				break;
+			case 76: {
+				/* switch-chapol-out */
+
+	int is_client, prf_id;
+
+	prf_id = T0_POP();
+	is_client = T0_POP();
+	br_ssl_engine_switch_chapol_out(ENG, is_client, prf_id);
+
+				}
+				break;
+			case 77: {
+				/* test-protocol-name */
+
+	size_t len = T0_POP();
+	size_t u;
+
+	for (u = 0; u < ENG->protocol_names_num; u ++) {
+		const char *name;
+
+		name = ENG->protocol_names[u];
+		if (len == strlen(name) && memcmp(ENG->pad, name, len) == 0) {
+			T0_PUSH(u);
+			T0_RET();
+		}
+	}
+	T0_PUSHi(-1);
+
+				}
+				break;
+			case 78: {
+				/* total-chain-length */
+
+	size_t u;
+	uint32_t total;
+
+	total = 0;
+	for (u = 0; u < ENG->chain_len; u ++) {
+		total += 3 + (uint32_t)ENG->chain[u].data_len;
+	}
+	T0_PUSH(total);
+
+				}
+				break;
+			case 79: {
+				/* u>> */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x >> c);
+
+				}
+				break;
+			case 80: {
+				/* verify-SKE-sig */
+
+	size_t sig_len = T0_POP();
+	int use_rsa = T0_POPi();
+	int hash = T0_POPi();
+
+	T0_PUSH(verify_SKE_sig(CTX, hash, use_rsa, sig_len));
+
+				}
+				break;
+			case 81: {
+				/* write-blob-chunk */
+
+	size_t clen = ENG->hlen_out;
+	if (clen > 0) {
+		uint32_t addr, len;
+
+		len = T0_POP();
+		addr = T0_POP();
+		if ((size_t)len < clen) {
+			clen = (size_t)len;
+		}
+		memcpy(ENG->hbuf_out, (unsigned char *)ENG + addr, clen);
+		if (ENG->record_type_out == BR_SSL_HANDSHAKE) {
+			br_multihash_update(&ENG->mhash, ENG->hbuf_out, clen);
+		}
+		T0_PUSH(addr + (uint32_t)clen);
+		T0_PUSH(len - (uint32_t)clen);
+		ENG->hbuf_out += clen;
+		ENG->hlen_out -= clen;
+	}
+
+				}
+				break;
+			case 82: {
+				/* write8-native */
+
+	unsigned char x;
+
+	x = (unsigned char)T0_POP();
+	if (ENG->hlen_out > 0) {
+		if (ENG->record_type_out == BR_SSL_HANDSHAKE) {
+			br_multihash_update(&ENG->mhash, &x, 1);
+		}
+		*ENG->hbuf_out ++ = x;
+		ENG->hlen_out --;
+		T0_PUSHi(-1);
+	} else {
+		T0_PUSHi(0);
+	}
+
+				}
+				break;
+			case 83: {
+				/* x509-append */
+
+	const br_x509_class *xc;
+	size_t len;
+
+	xc = *(ENG->x509ctx);
+	len = T0_POP();
+	xc->append(ENG->x509ctx, ENG->pad, len);
+
+				}
+				break;
+			case 84: {
+				/* x509-end-cert */
+
+	const br_x509_class *xc;
+
+	xc = *(ENG->x509ctx);
+	xc->end_cert(ENG->x509ctx);
+
+				}
+				break;
+			case 85: {
+				/* x509-end-chain */
+
+	const br_x509_class *xc;
+
+	xc = *(ENG->x509ctx);
+	T0_PUSH(xc->end_chain(ENG->x509ctx));
+
+				}
+				break;
+			case 86: {
+				/* x509-start-cert */
+
+	const br_x509_class *xc;
+
+	xc = *(ENG->x509ctx);
+	xc->start_cert(ENG->x509ctx, T0_POP());
+
+				}
+				break;
+			case 87: {
+				/* x509-start-chain */
+
+	const br_x509_class *xc;
+	uint32_t bc;
+
+	bc = T0_POP();
+	xc = *(ENG->x509ctx);
+	xc->start_chain(ENG->x509ctx, bc ? ENG->server_name : NULL);
+
+				}
+				break;
+			}
+
+		} else {
+			T0_ENTER(ip, rp, t0x);
+		}
+	}
+t0_exit:
+	((t0_context *)t0ctx)->dp = dp;
+	((t0_context *)t0ctx)->rp = rp;
+	((t0_context *)t0ctx)->ip = ip;
+}
diff --git a/third_party/bearssl/src/ssl_io.c b/third_party/bearssl/src/ssl_io.c
new file mode 100644
index 0000000..1952615
--- /dev/null
+++ b/third_party/bearssl/src/ssl_io.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_ssl.h */
+void
+br_sslio_init(br_sslio_context *ctx,
+	br_ssl_engine_context *engine,
+	int (*low_read)(void *read_context,
+		unsigned char *data, size_t len),
+	void *read_context,
+	int (*low_write)(void *write_context,
+		const unsigned char *data, size_t len),
+	void *write_context)
+{
+	ctx->engine = engine;
+	ctx->low_read = low_read;
+	ctx->read_context = read_context;
+	ctx->low_write = low_write;
+	ctx->write_context = write_context;
+}
+
+/*
+ * Run the engine, until the specified target state is achieved, or
+ * an error occurs. The target state is SENDAPP, RECVAPP, or the
+ * combination of both (the combination matches either). When a match is
+ * achieved, this function returns 0. On error, it returns -1.
+ */
+static int
+run_until(br_sslio_context *ctx, unsigned target)
+{
+	for (;;) {
+		unsigned state;
+
+		state = br_ssl_engine_current_state(ctx->engine);
+		if (state & BR_SSL_CLOSED) {
+			return -1;
+		}
+
+		/*
+		 * If there is some record data to send, do it. This takes
+		 * precedence over everything else.
+		 */
+		if (state & BR_SSL_SENDREC) {
+			unsigned char *buf;
+			size_t len;
+			int wlen;
+
+			buf = br_ssl_engine_sendrec_buf(ctx->engine, &len);
+			wlen = ctx->low_write(ctx->write_context, buf, len);
+			if (wlen < 0) {
+				/*
+				 * If we received a close_notify and we
+				 * still send something, then we have our
+				 * own response close_notify to send, and
+				 * the peer is allowed by RFC 5246 not to
+				 * wait for it.
+				 */
+				if (!ctx->engine->shutdown_recv) {
+					br_ssl_engine_fail(
+						ctx->engine, BR_ERR_IO);
+				}
+				return -1;
+			}
+			if (wlen > 0) {
+				br_ssl_engine_sendrec_ack(ctx->engine, wlen);
+			}
+			continue;
+		}
+
+		/*
+		 * If we reached our target, then we are finished.
+		 */
+		if (state & target) {
+			return 0;
+		}
+
+		/*
+		 * If some application data must be read, and we did not
+		 * exit, then this means that we are trying to write data,
+		 * and that's not possible until the application data is
+		 * read. This may happen if using a shared in/out buffer,
+		 * and the underlying protocol is not strictly half-duplex.
+		 * This is unrecoverable here, so we report an error.
+		 */
+		if (state & BR_SSL_RECVAPP) {
+			return -1;
+		}
+
+		/*
+		 * If we reached that point, then either we are trying
+		 * to read data and there is some, or the engine is stuck
+		 * until a new record is obtained.
+		 */
+		if (state & BR_SSL_RECVREC) {
+			unsigned char *buf;
+			size_t len;
+			int rlen;
+
+			buf = br_ssl_engine_recvrec_buf(ctx->engine, &len);
+			rlen = ctx->low_read(ctx->read_context, buf, len);
+			if (rlen < 0) {
+				br_ssl_engine_fail(ctx->engine, BR_ERR_IO);
+				return -1;
+			}
+			if (rlen > 0) {
+				br_ssl_engine_recvrec_ack(ctx->engine, rlen);
+			}
+			continue;
+		}
+
+		/*
+		 * We can reach that point if the target RECVAPP, and
+		 * the state contains SENDAPP only. This may happen with
+		 * a shared in/out buffer. In that case, we must flush
+		 * the buffered data to "make room" for a new incoming
+		 * record.
+		 */
+		br_ssl_engine_flush(ctx->engine, 0);
+	}
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_read(br_sslio_context *ctx, void *dst, size_t len)
+{
+	unsigned char *buf;
+	size_t alen;
+
+	if (len == 0) {
+		return 0;
+	}
+	if (run_until(ctx, BR_SSL_RECVAPP) < 0) {
+		return -1;
+	}
+	buf = br_ssl_engine_recvapp_buf(ctx->engine, &alen);
+	if (alen > len) {
+		alen = len;
+	}
+	memcpy(dst, buf, alen);
+	br_ssl_engine_recvapp_ack(ctx->engine, alen);
+	return (int)alen;
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_read_all(br_sslio_context *ctx, void *dst, size_t len)
+{
+	unsigned char *buf;
+
+	buf = dst;
+	while (len > 0) {
+		int rlen;
+
+		rlen = br_sslio_read(ctx, buf, len);
+		if (rlen < 0) {
+			return -1;
+		}
+		buf += rlen;
+		len -= (size_t)rlen;
+	}
+	return 0;
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_write(br_sslio_context *ctx, const void *src, size_t len)
+{
+	unsigned char *buf;
+	size_t alen;
+
+	if (len == 0) {
+		return 0;
+	}
+	if (run_until(ctx, BR_SSL_SENDAPP) < 0) {
+		return -1;
+	}
+	buf = br_ssl_engine_sendapp_buf(ctx->engine, &alen);
+	if (alen > len) {
+		alen = len;
+	}
+	memcpy(buf, src, alen);
+	br_ssl_engine_sendapp_ack(ctx->engine, alen);
+	return (int)alen;
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_write_all(br_sslio_context *ctx, const void *src, size_t len)
+{
+	const unsigned char *buf;
+
+	buf = src;
+	while (len > 0) {
+		int wlen;
+
+		wlen = br_sslio_write(ctx, buf, len);
+		if (wlen < 0) {
+			return -1;
+		}
+		buf += wlen;
+		len -= (size_t)wlen;
+	}
+	return 0;
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_flush(br_sslio_context *ctx)
+{
+	/*
+	 * We trigger a flush. We know the data is gone when there is
+	 * no longer any record data to send, and we can either read
+	 * or write application data. The call to run_until() does the
+	 * job because it ensures that any assembled record data is
+	 * first sent down the wire before considering anything else.
+	 */
+	br_ssl_engine_flush(ctx->engine, 0);
+	return run_until(ctx, BR_SSL_SENDAPP | BR_SSL_RECVAPP);
+}
+
+/* see bearssl_ssl.h */
+int
+br_sslio_close(br_sslio_context *ctx)
+{
+	br_ssl_engine_close(ctx->engine);
+	while (br_ssl_engine_current_state(ctx->engine) != BR_SSL_CLOSED) {
+		/*
+		 * Discard any incoming application data.
+		 */
+		size_t len;
+
+		run_until(ctx, BR_SSL_RECVAPP);
+		if (br_ssl_engine_recvapp_buf(ctx->engine, &len) != NULL) {
+			br_ssl_engine_recvapp_ack(ctx->engine, len);
+		}
+	}
+	return br_ssl_engine_last_error(ctx->engine) == BR_ERR_OK;
+}
diff --git a/third_party/bearssl/src/ssl_keyexport.c b/third_party/bearssl/src/ssl_keyexport.c
new file mode 100644
index 0000000..58e6dc3
--- /dev/null
+++ b/third_party/bearssl/src/ssl_keyexport.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Supported cipher suites that use SHA-384 for the PRF when selected
+ * for TLS 1.2. All other cipher suites are deemed to use SHA-256.
+ */
+static const uint16_t suites_sha384[] = {
+	BR_TLS_RSA_WITH_AES_256_GCM_SHA384,
+	BR_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384,
+	BR_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384,
+	BR_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384,
+	BR_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384,
+	BR_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
+	BR_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384,
+	BR_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384,
+	BR_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384
+};
+
+/* see bearssl_ssl.h */
+int
+br_ssl_key_export(br_ssl_engine_context *cc,
+	void *dst, size_t len, const char *label,
+	const void *context, size_t context_len)
+{
+	br_tls_prf_seed_chunk chunks[4];
+	br_tls_prf_impl iprf;
+	size_t num_chunks, u;
+	unsigned char tmp[2];
+	int prf_id;
+
+	if (cc->application_data != 1) {
+		return 0;
+	}
+	chunks[0].data = cc->client_random;
+	chunks[0].len = sizeof cc->client_random;
+	chunks[1].data = cc->server_random;
+	chunks[1].len = sizeof cc->server_random;
+	if (context != NULL) {
+		br_enc16be(tmp, (unsigned)context_len);
+		chunks[2].data = tmp;
+		chunks[2].len = 2;
+		chunks[3].data = context;
+		chunks[3].len = context_len;
+		num_chunks = 4;
+	} else {
+		num_chunks = 2;
+	}
+	prf_id = BR_SSLPRF_SHA256;
+	for (u = 0; u < (sizeof suites_sha384) / sizeof(uint16_t); u ++) {
+		if (suites_sha384[u] == cc->session.cipher_suite) {
+			prf_id = BR_SSLPRF_SHA384;
+		}
+	}
+	iprf = br_ssl_engine_get_PRF(cc, prf_id);
+	iprf(dst, len,
+		cc->session.master_secret, sizeof cc->session.master_secret,
+		label, num_chunks, chunks);
+	return 1;
+}
diff --git a/third_party/bearssl/src/ssl_lru.c b/third_party/bearssl/src/ssl_lru.c
new file mode 100644
index 0000000..4c71011
--- /dev/null
+++ b/third_party/bearssl/src/ssl_lru.c
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Each entry consists in a fixed number of bytes. Entries are concatenated
+ * in the store block. "Addresses" are really offsets in the block,
+ * expressed over 32 bits (so the cache may have size at most 4 GB, which
+ * "ought to be enough for everyone"). The "null address" is 0xFFFFFFFF.
+ * Note that since the storage block alignment is in no way guaranteed, we
+ * perform only accesses that can handle unaligned data.
+ *
+ * Two concurrent data structures are maintained:
+ *
+ * -- Entries are organised in a doubly-linked list; saved entries are added
+ * at the head, and loaded entries are moved to the head. Eviction uses
+ * the list tail (this is the LRU algorithm).
+ *
+ * -- Entries are indexed with a binary tree: all left descendants of a
+ * node have a lower session ID (in lexicographic order), while all
+ * right descendants have a higher session ID. The tree is heuristically
+ * balanced.
+ *
+ * Entry format:
+ *
+ *   session ID          32 bytes
+ *   master secret       48 bytes
+ *   protocol version    2 bytes (big endian)
+ *   cipher suite        2 bytes (big endian)
+ *   list prev           4 bytes (big endian)
+ *   list next           4 bytes (big endian)
+ *   tree left child     4 bytes (big endian)
+ *   tree right child    4 bytes (big endian)
+ *
+ * If an entry has a protocol version set to 0, then it is "disabled":
+ * it was a session pushed to the cache at some point, but it has
+ * been explicitly removed.
+ *
+ * We need to keep the tree balanced because an attacker could make
+ * handshakes, selecting some specific sessions (by reusing them) to
+ * try to make us make an imbalanced tree that makes lookups expensive
+ * (a denial-of-service attack that would persist as long as the cache
+ * remains, i.e. even after the attacker made all his connections).
+ * To do that, we replace the session ID (or the start of the session ID)
+ * with a HMAC value computed over the replaced part; the hash function
+ * implementation and the key are obtained from the server context upon
+ * first save() call.
+ *
+ * Theoretically, an attacker could use the exact timing of the lookup
+ * to infer the current tree topology, and try to revive entries to make
+ * it as unbalanced as possible. However, since the session ID are
+ * chosen randomly by the server, and the attacker cannot see the
+ * indexing values and must thus rely on blind selection, it should be
+ * exponentially difficult for the attacker to maintain a large
+ * imbalance.
+ */
+#define SESSION_ID_LEN       32
+#define MASTER_SECRET_LEN    48
+
+#define SESSION_ID_OFF        0
+#define MASTER_SECRET_OFF    32
+#define VERSION_OFF          80
+#define CIPHER_SUITE_OFF     82
+#define LIST_PREV_OFF        84
+#define LIST_NEXT_OFF        88
+#define TREE_LEFT_OFF        92
+#define TREE_RIGHT_OFF       96
+
+#define LRU_ENTRY_LEN       100
+
+#define ADDR_NULL   ((uint32_t)-1)
+
+#define GETSET(name, off) \
+static inline uint32_t get_ ## name(br_ssl_session_cache_lru *cc, uint32_t x) \
+{ \
+	return br_dec32be(cc->store + x + (off)); \
+} \
+static inline void set_ ## name(br_ssl_session_cache_lru *cc, \
+	uint32_t x, uint32_t val) \
+{ \
+	br_enc32be(cc->store + x + (off), val); \
+}
+
+GETSET(prev, LIST_PREV_OFF)
+GETSET(next, LIST_NEXT_OFF)
+GETSET(left, TREE_LEFT_OFF)
+GETSET(right, TREE_RIGHT_OFF)
+
+/*
+ * Transform the session ID by replacing the first N bytes with a HMAC
+ * value computed over these bytes, using the random key K (the HMAC
+ * value is truncated if needed). HMAC will use the same hash function
+ * as the DRBG in the SSL server context, so with SHA-256, SHA-384,
+ * or SHA-1, depending on what is available.
+ *
+ * The risk of collision is considered too small to be a concern; and
+ * the impact of a collision is low (the handshake won't succeed). This
+ * risk is much lower than any transmission error, which would lead to
+ * the same consequences.
+ *
+ * Source and destination arrays msut be disjoint.
+ */
+static void
+mask_id(br_ssl_session_cache_lru *cc,
+	const unsigned char *src, unsigned char *dst)
+{
+	br_hmac_key_context hkc;
+	br_hmac_context hc;
+
+	memcpy(dst, src, SESSION_ID_LEN);
+	br_hmac_key_init(&hkc, cc->hash, cc->index_key, sizeof cc->index_key);
+	br_hmac_init(&hc, &hkc, SESSION_ID_LEN);
+	br_hmac_update(&hc, src, SESSION_ID_LEN);
+	br_hmac_out(&hc, dst);
+}
+
+/*
+ * Find a node by ID. Returned value is the node address, or ADDR_NULL if
+ * the node is not found.
+ *
+ * If addr_link is not NULL, then '*addr_link' is set to the address of the
+ * last followed link. If the found node is the root, or if the tree is
+ * empty, then '*addr_link' is set to ADDR_NULL.
+ */
+static uint32_t
+find_node(br_ssl_session_cache_lru *cc, const unsigned char *id,
+	uint32_t *addr_link)
+{
+	uint32_t x, y;
+
+	x = cc->root;
+	y = ADDR_NULL;
+	while (x != ADDR_NULL) {
+		int r;
+
+		r = memcmp(id, cc->store + x + SESSION_ID_OFF, SESSION_ID_LEN);
+		if (r < 0) {
+			y = x + TREE_LEFT_OFF;
+			x = get_left(cc, x);
+		} else if (r == 0) {
+			if (addr_link != NULL) {
+				*addr_link = y;
+			}
+			return x;
+		} else {
+			y = x + TREE_RIGHT_OFF;
+			x = get_right(cc, x);
+		}
+	}
+	if (addr_link != NULL) {
+		*addr_link = y;
+	}
+	return ADDR_NULL;
+}
+
+/*
+ * For node x, find its replacement upon removal.
+ *
+ *  -- If node x has no child, then this returns ADDR_NULL.
+ *  -- Otherwise, if node x has a left child, then the replacement is the
+ *     rightmost left-descendent.
+ *  -- Otherwise, the replacement is the leftmost right-descendent.
+ *
+ * If a node is returned, then '*al' is set to the address of the field
+ * that points to that node. Otherwise (node x has no child), '*al' is
+ * set to ADDR_NULL.
+ *
+ * Note that the replacement node, when found, is always a descendent
+ * of node 'x', so it cannot be the tree root. Thus, '*al' can be set
+ * to ADDR_NULL only when no node is found and ADDR_NULL is returned.
+ */
+static uint32_t
+find_replacement_node(br_ssl_session_cache_lru *cc, uint32_t x, uint32_t *al)
+{
+	uint32_t y1, y2;
+
+	y1 = get_left(cc, x);
+	if (y1 != ADDR_NULL) {
+		y2 = x + TREE_LEFT_OFF;
+		for (;;) {
+			uint32_t z;
+
+			z = get_right(cc, y1);
+			if (z == ADDR_NULL) {
+				*al = y2;
+				return y1;
+			}
+			y2 = y1 + TREE_RIGHT_OFF;
+			y1 = z;
+		}
+	}
+	y1 = get_right(cc, x);
+	if (y1 != ADDR_NULL) {
+		y2 = x + TREE_RIGHT_OFF;
+		for (;;) {
+			uint32_t z;
+
+			z = get_left(cc, y1);
+			if (z == ADDR_NULL) {
+				*al = y2;
+				return y1;
+			}
+			y2 = y1 + TREE_LEFT_OFF;
+			y1 = z;
+		}
+	}
+	*al = ADDR_NULL;
+	return ADDR_NULL;
+}
+
+/*
+ * Set the link at address 'alx' to point to node 'x'. If 'alx' is
+ * ADDR_NULL, then this sets the tree root to 'x'.
+ */
+static inline void
+set_link(br_ssl_session_cache_lru *cc, uint32_t alx, uint32_t x)
+{
+	if (alx == ADDR_NULL) {
+		cc->root = x;
+	} else {
+		br_enc32be(cc->store + alx, x);
+	}
+}
+
+/*
+ * Remove node 'x' from the tree. This function shall not be called if
+ * node 'x' is not part of the tree.
+ */
+static void
+remove_node(br_ssl_session_cache_lru *cc, uint32_t x)
+{
+	uint32_t alx, y, aly;
+
+	/*
+	 * Removal algorithm:
+	 * ------------------
+	 *
+	 * - If we remove the root, then the tree becomes empty.
+	 *
+	 * - If the removed node has no child, then we can simply remove
+	 *   it, with nothing else to do.
+	 *
+	 * - Otherwise, the removed node must be replaced by either its
+	 *   rightmost left-descendent, or its leftmost right-descendent.
+	 *   The replacement node itself must be removed from its current
+	 *   place. By definition, that replacement node has either no
+	 *   child, or at most a single child that will replace it in the
+	 *   tree.
+	 */
+
+	/*
+	 * Find node back and its ancestor link. If the node was the
+	 * root, then alx is set to ADDR_NULL.
+	 */
+	find_node(cc, cc->store + x + SESSION_ID_OFF, &alx);
+
+	/*
+	 * Find replacement node 'y', and 'aly' is set to the address of
+	 * the link to that replacement node. If the removed node has no
+	 * child, then both 'y' and 'aly' are set to ADDR_NULL.
+	 */
+	y = find_replacement_node(cc, x, &aly);
+
+	if (y != ADDR_NULL) {
+		uint32_t z;
+
+		/*
+		 * The unlinked replacement node may have one child (but
+		 * not two) that takes its place.
+		 */
+		z = get_left(cc, y);
+		if (z == ADDR_NULL) {
+			z = get_right(cc, y);
+		}
+		set_link(cc, aly, z);
+
+		/*
+		 * Link the replacement node in its new place, overwriting
+		 * the current link to the node 'x' (which removes 'x').
+		 */
+		set_link(cc, alx, y);
+
+		/*
+		 * The replacement node adopts the left and right children
+		 * of the removed node. Note that this also works even if
+		 * the replacement node was a direct descendent of the
+		 * removed node, since we unlinked it previously.
+		 */
+		set_left(cc, y, get_left(cc, x));
+		set_right(cc, y, get_right(cc, x));
+	} else {
+		/*
+		 * No replacement, we simply unlink the node 'x'.
+		 */
+		set_link(cc, alx, ADDR_NULL);
+	}
+}
+
+static void
+lru_save(const br_ssl_session_cache_class **ctx,
+	br_ssl_server_context *server_ctx,
+	const br_ssl_session_parameters *params)
+{
+	br_ssl_session_cache_lru *cc;
+	unsigned char id[SESSION_ID_LEN];
+	uint32_t x, alx;
+
+	cc = (br_ssl_session_cache_lru *)ctx;
+
+	/*
+	 * If the buffer is too small, we don't record anything. This
+	 * test avoids problems in subsequent code.
+	 */
+	if (cc->store_len < LRU_ENTRY_LEN) {
+		return;
+	}
+
+	/*
+	 * Upon the first save in a session cache instance, we obtain
+	 * a random key for our indexing.
+	 */
+	if (!cc->init_done) {
+		br_hmac_drbg_generate(&server_ctx->eng.rng,
+			cc->index_key, sizeof cc->index_key);
+		cc->hash = br_hmac_drbg_get_hash(&server_ctx->eng.rng);
+		cc->init_done = 1;
+	}
+	mask_id(cc, params->session_id, id);
+
+	/*
+	 * Look for the node in the tree. If the same ID is already used,
+	 * then reject it. This is a collision event, which should be
+	 * exceedingly rare.
+	 * Note: we do NOT record the emplacement here, because the
+	 * removal of an entry may change the tree topology.
+	 */
+	if (find_node(cc, id, NULL) != ADDR_NULL) {
+		return;
+	}
+
+	/*
+	 * Find some room for the new parameters. If the cache is not
+	 * full yet, add it to the end of the area and bump the pointer up.
+	 * Otherwise, evict the list tail entry. Note that we already
+	 * filtered out the case of a ridiculously small buffer that
+	 * cannot hold any entry at all; thus, if there is no room for an
+	 * extra entry, then the cache cannot be empty.
+	 */
+	if (cc->store_ptr > (cc->store_len - LRU_ENTRY_LEN)) {
+		/*
+		 * Evict tail. If the buffer has room for a single entry,
+		 * then this may also be the head.
+		 */
+		x = cc->tail;
+		cc->tail = get_prev(cc, x);
+		if (cc->tail == ADDR_NULL) {
+			cc->head = ADDR_NULL;
+		} else {
+			set_next(cc, cc->tail, ADDR_NULL);
+		}
+
+		/*
+		 * Remove the node from the tree.
+		 */
+		remove_node(cc, x);
+	} else {
+		/*
+		 * Allocate room for new node.
+		 */
+		x = cc->store_ptr;
+		cc->store_ptr += LRU_ENTRY_LEN;
+	}
+
+	/*
+	 * Find the emplacement for the new node, and link it.
+	 */
+	find_node(cc, id, &alx);
+	set_link(cc, alx, x);
+	set_left(cc, x, ADDR_NULL);
+	set_right(cc, x, ADDR_NULL);
+
+	/*
+	 * New entry becomes new list head. It may also become the list
+	 * tail if the cache was empty at that point.
+	 */
+	if (cc->head == ADDR_NULL) {
+		cc->tail = x;
+	} else {
+		set_prev(cc, cc->head, x);
+	}
+	set_prev(cc, x, ADDR_NULL);
+	set_next(cc, x, cc->head);
+	cc->head = x;
+
+	/*
+	 * Fill data in the entry.
+	 */
+	memcpy(cc->store + x + SESSION_ID_OFF, id, SESSION_ID_LEN);
+	memcpy(cc->store + x + MASTER_SECRET_OFF,
+		params->master_secret, MASTER_SECRET_LEN);
+	br_enc16be(cc->store + x + VERSION_OFF, params->version);
+	br_enc16be(cc->store + x + CIPHER_SUITE_OFF, params->cipher_suite);
+}
+
+static int
+lru_load(const br_ssl_session_cache_class **ctx,
+	br_ssl_server_context *server_ctx,
+	br_ssl_session_parameters *params)
+{
+	br_ssl_session_cache_lru *cc;
+	unsigned char id[SESSION_ID_LEN];
+	uint32_t x;
+
+	(void)server_ctx;
+	cc = (br_ssl_session_cache_lru *)ctx;
+	if (!cc->init_done) {
+		return 0;
+	}
+	mask_id(cc, params->session_id, id);
+	x = find_node(cc, id, NULL);
+	if (x != ADDR_NULL) {
+		unsigned version;
+
+		version = br_dec16be(cc->store + x + VERSION_OFF);
+		if (version == 0) {
+			/*
+			 * Entry is disabled, we pretend we did not find it.
+			 * Notably, we don't move it to the front of the
+			 * LRU list.
+			 */
+			return 0;
+		}
+		params->version = version;
+		params->cipher_suite = br_dec16be(
+			cc->store + x + CIPHER_SUITE_OFF);
+		memcpy(params->master_secret,
+			cc->store + x + MASTER_SECRET_OFF,
+			MASTER_SECRET_LEN);
+		if (x != cc->head) {
+			/*
+			 * Found node is not at list head, so move
+			 * it to the head.
+			 */
+			uint32_t p, n;
+
+			p = get_prev(cc, x);
+			n = get_next(cc, x);
+			set_next(cc, p, n);
+			if (n == ADDR_NULL) {
+				cc->tail = p;
+			} else {
+				set_prev(cc, n, p);
+			}
+			set_prev(cc, cc->head, x);
+			set_next(cc, x, cc->head);
+			set_prev(cc, x, ADDR_NULL);
+			cc->head = x;
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static const br_ssl_session_cache_class lru_class = {
+	sizeof(br_ssl_session_cache_lru),
+	&lru_save,
+	&lru_load
+};
+
+/* see inner.h */
+void
+br_ssl_session_cache_lru_init(br_ssl_session_cache_lru *cc,
+	unsigned char *store, size_t store_len)
+{
+	cc->vtable = &lru_class;
+	cc->store = store;
+	cc->store_len = store_len;
+	cc->store_ptr = 0;
+	cc->init_done = 0;
+	cc->head = ADDR_NULL;
+	cc->tail = ADDR_NULL;
+	cc->root = ADDR_NULL;
+}
+
+/* see bearssl_ssl.h */
+void br_ssl_session_cache_lru_forget(
+	br_ssl_session_cache_lru *cc, const unsigned char *id)
+{
+	unsigned char mid[SESSION_ID_LEN];
+	uint32_t addr;
+
+	/*
+	 * If the cache is not initialised yet, then it is empty, and
+	 * there is nothing to forget.
+	 */
+	if (!cc->init_done) {
+		return;
+	}
+
+	/*
+	 * Look for the node in the tree. If found, the entry is marked
+	 * as "disabled"; it will be reused in due course, as it ages
+	 * through the list.
+	 *
+	 * We do not go through the complex moves of actually releasing
+	 * the entry right away because explicitly forgetting sessions
+	 * should be a rare event, meant mostly for testing purposes,
+	 * so this is not worth the extra code size.
+	 */
+	mask_id(cc, id, mid);
+	addr = find_node(cc, mid, NULL);
+	if (addr != ADDR_NULL) {
+		br_enc16be(cc->store + addr + VERSION_OFF, 0);
+	}
+}
diff --git a/third_party/bearssl/src/ssl_rec_cbc.c b/third_party/bearssl/src/ssl_rec_cbc.c
new file mode 100644
index 0000000..c38cbfd
--- /dev/null
+++ b/third_party/bearssl/src/ssl_rec_cbc.c
@@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static void
+in_cbc_init(br_sslrec_in_cbc_context *cc,
+	const br_block_cbcdec_class *bc_impl,
+	const void *bc_key, size_t bc_key_len,
+	const br_hash_class *dig_impl,
+	const void *mac_key, size_t mac_key_len, size_t mac_out_len,
+	const void *iv)
+{
+	cc->vtable = &br_sslrec_in_cbc_vtable;
+	cc->seq = 0;
+	bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len);
+	br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len);
+	cc->mac_len = mac_out_len;
+	if (iv == NULL) {
+		memset(cc->iv, 0, sizeof cc->iv);
+		cc->explicit_IV = 1;
+	} else {
+		memcpy(cc->iv, iv, bc_impl->block_size);
+		cc->explicit_IV = 0;
+	}
+}
+
+static int
+cbc_check_length(const br_sslrec_in_cbc_context *cc, size_t rlen)
+{
+	/*
+	 * Plaintext size: at most 16384 bytes
+	 * Padding: at most 256 bytes
+	 * MAC: mac_len extra bytes
+	 * TLS 1.1+: each record has an explicit IV
+	 *
+	 * Minimum length includes at least one byte of padding, and the
+	 * MAC.
+	 *
+	 * Total length must be a multiple of the block size.
+	 */
+	size_t blen;
+	size_t min_len, max_len;
+
+	blen = cc->bc.vtable->block_size;
+	min_len = (blen + cc->mac_len) & ~(blen - 1);
+	max_len = (16384 + 256 + cc->mac_len) & ~(blen - 1);
+	if (cc->explicit_IV) {
+		min_len += blen;
+		max_len += blen;
+	}
+	return min_len <= rlen && rlen <= max_len;
+}
+
+/*
+ * Rotate array buf[] of length 'len' to the left (towards low indices)
+ * by 'num' bytes if ctl is 1; otherwise, leave it unchanged. This is
+ * constant-time. 'num' MUST be lower than 'len'. 'len' MUST be lower
+ * than or equal to 64.
+ */
+static void
+cond_rotate(uint32_t ctl, unsigned char *buf, size_t len, size_t num)
+{
+	unsigned char tmp[64];
+	size_t u, v;
+
+	for (u = 0, v = num; u < len; u ++) {
+		tmp[u] = MUX(ctl, buf[v], buf[u]);
+		if (++ v == len) {
+			v = 0;
+		}
+	}
+	memcpy(buf, tmp, len);
+}
+
+static unsigned char *
+cbc_decrypt(br_sslrec_in_cbc_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	/*
+	 * We represent all lengths on 32-bit integers, because:
+	 * -- SSL record lengths always fit in 32 bits;
+	 * -- our constant-time primitives operate on 32-bit integers.
+	 */
+	unsigned char *buf;
+	uint32_t u, v, len, blen, min_len, max_len;
+	uint32_t good, pad_len, rot_count, len_withmac, len_nomac;
+	unsigned char tmp1[64], tmp2[64];
+	int i;
+	br_hmac_context hc;
+
+	buf = data;
+	len = *data_len;
+	blen = cc->bc.vtable->block_size;
+
+	/*
+	 * Decrypt data, and skip the explicit IV (if applicable). Note
+	 * that the total length is supposed to have been verified by
+	 * the caller. If there is an explicit IV, then we actually
+	 * "decrypt" it using the implicit IV (from previous record),
+	 * which is useless but harmless.
+	 */
+	cc->bc.vtable->run(&cc->bc.vtable, cc->iv, data, len);
+	if (cc->explicit_IV) {
+		buf += blen;
+		len -= blen;
+	}
+
+	/*
+	 * Compute minimum and maximum length of plaintext + MAC. These
+	 * lengths can be inferred from the outside: they are not secret.
+	 */
+	min_len = (cc->mac_len + 256 < len) ? len - 256 : cc->mac_len;
+	max_len = len - 1;
+
+	/*
+	 * Use the last decrypted byte to compute the actual payload
+	 * length. Take care not to overflow (we use unsigned types).
+	 */
+	pad_len = buf[max_len];
+	good = LE(pad_len, (uint32_t)(max_len - min_len));
+	len = MUX(good, (uint32_t)(max_len - pad_len), min_len);
+
+	/*
+	 * Check padding contents: all padding bytes must be equal to
+	 * the value of pad_len.
+	 */
+	for (u = min_len; u < max_len; u ++) {
+		good &= LT(u, len) | EQ(buf[u], pad_len);
+	}
+
+	/*
+	 * Extract the MAC value. This is done in one pass, but results
+	 * in a "rotated" MAC value depending on where it actually
+	 * occurs. The 'rot_count' value is set to the offset of the
+	 * first MAC byte within tmp1[].
+	 *
+	 * min_len and max_len are also adjusted to the minimum and
+	 * maximum lengths of the plaintext alone (without the MAC).
+	 */
+	len_withmac = (uint32_t)len;
+	len_nomac = len_withmac - cc->mac_len;
+	min_len -= cc->mac_len;
+	rot_count = 0;
+	memset(tmp1, 0, cc->mac_len);
+	v = 0;
+	for (u = min_len; u < max_len; u ++) {
+		tmp1[v] |= MUX(GE(u, len_nomac) & LT(u, len_withmac),
+			buf[u], 0x00);
+		rot_count = MUX(EQ(u, len_nomac), v, rot_count);
+		if (++ v == cc->mac_len) {
+			v = 0;
+		}
+	}
+	max_len -= cc->mac_len;
+
+	/*
+	 * Rotate back the MAC value. The loop below does the constant-time
+	 * rotation in time n*log n for a MAC output of length n. We assume
+	 * that the MAC output length is no more than 64 bytes, so the
+	 * rotation count fits on 6 bits.
+	 */
+	for (i = 5; i >= 0; i --) {
+		uint32_t rc;
+
+		rc = (uint32_t)1 << i;
+		cond_rotate(rot_count >> i, tmp1, cc->mac_len, rc);
+		rot_count &= ~rc;
+	}
+
+	/*
+	 * Recompute the HMAC value. The input is the concatenation of
+	 * the sequence number (8 bytes), the record header (5 bytes),
+	 * and the payload.
+	 *
+	 * At that point, min_len is the minimum plaintext length, but
+	 * max_len still includes the MAC length.
+	 */
+	br_enc64be(tmp2, cc->seq ++);
+	tmp2[8] = (unsigned char)record_type;
+	br_enc16be(tmp2 + 9, version);
+	br_enc16be(tmp2 + 11, len_nomac);
+	br_hmac_init(&hc, &cc->mac, cc->mac_len);
+	br_hmac_update(&hc, tmp2, 13);
+	br_hmac_outCT(&hc, buf, len_nomac, min_len, max_len, tmp2);
+
+	/*
+	 * Compare the extracted and recomputed MAC values.
+	 */
+	for (u = 0; u < cc->mac_len; u ++) {
+		good &= EQ0(tmp1[u] ^ tmp2[u]);
+	}
+
+	/*
+	 * Check that the plaintext length is valid. The previous
+	 * check was on the encrypted length, but the padding may have
+	 * turned shorter than expected.
+	 *
+	 * Once this final test is done, the critical "constant-time"
+	 * section ends and we can make conditional jumps again.
+	 */
+	good &= LE(len_nomac, 16384);
+
+	if (!good) {
+		return 0;
+	}
+	*data_len = len_nomac;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_in_cbc_class br_sslrec_in_cbc_vtable = {
+	{
+		sizeof(br_sslrec_in_cbc_context),
+		(int (*)(const br_sslrec_in_class *const *, size_t))
+			&cbc_check_length,
+		(unsigned char *(*)(const br_sslrec_in_class **,
+			int, unsigned, void *, size_t *))
+			&cbc_decrypt
+	},
+	(void (*)(const br_sslrec_in_cbc_class **,
+		const br_block_cbcdec_class *, const void *, size_t,
+		const br_hash_class *, const void *, size_t, size_t,
+		const void *))
+		&in_cbc_init
+};
+
+/*
+ * For CBC output:
+ *
+ * -- With TLS 1.1+, there is an explicit IV. Generation method uses
+ * HMAC, computed over the current sequence number, and the current MAC
+ * key. The resulting value is truncated to the size of a block, and
+ * added at the head of the plaintext; it will get encrypted along with
+ * the data. This custom generation mechanism is "safe" under the
+ * assumption that HMAC behaves like a random oracle; since the MAC for
+ * a record is computed over the concatenation of the sequence number,
+ * the record header and the plaintext, the HMAC-for-IV will not collide
+ * with the normal HMAC.
+ *
+ * -- With TLS 1.0, for application data, we want to enforce a 1/n-1
+ * split, as a countermeasure against chosen-plaintext attacks. We thus
+ * need to leave some room in the buffer for that extra record.
+ */
+
+static void
+out_cbc_init(br_sslrec_out_cbc_context *cc,
+	const br_block_cbcenc_class *bc_impl,
+	const void *bc_key, size_t bc_key_len,
+	const br_hash_class *dig_impl,
+	const void *mac_key, size_t mac_key_len, size_t mac_out_len,
+	const void *iv)
+{
+	cc->vtable = &br_sslrec_out_cbc_vtable;
+	cc->seq = 0;
+	bc_impl->init(&cc->bc.vtable, bc_key, bc_key_len);
+	br_hmac_key_init(&cc->mac, dig_impl, mac_key, mac_key_len);
+	cc->mac_len = mac_out_len;
+	if (iv == NULL) {
+		memset(cc->iv, 0, sizeof cc->iv);
+		cc->explicit_IV = 1;
+	} else {
+		memcpy(cc->iv, iv, bc_impl->block_size);
+		cc->explicit_IV = 0;
+	}
+}
+
+static void
+cbc_max_plaintext(const br_sslrec_out_cbc_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t blen, len;
+
+	blen = cc->bc.vtable->block_size;
+	if (cc->explicit_IV) {
+		*start += blen;
+	} else {
+		*start += 4 + ((cc->mac_len + blen + 1) & ~(blen - 1));
+	}
+	len = (*end - *start) & ~(blen - 1);
+	len -= 1 + cc->mac_len;
+	if (len > 16384) {
+		len = 16384;
+	}
+	*end = *start + len;
+}
+
+static unsigned char *
+cbc_encrypt(br_sslrec_out_cbc_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf, *rbuf;
+	size_t len, blen, plen;
+	unsigned char tmp[13];
+	br_hmac_context hc;
+
+	buf = data;
+	len = *data_len;
+	blen = cc->bc.vtable->block_size;
+
+	/*
+	 * If using TLS 1.0, with more than one byte of plaintext, and
+	 * the record is application data, then we need to compute
+	 * a "split". We do not perform the split on other record types
+	 * because it turned out that some existing, deployed
+	 * implementations of SSL/TLS do not tolerate the splitting of
+	 * some message types (in particular the Finished message).
+	 *
+	 * If using TLS 1.1+, then there is an explicit IV. We produce
+	 * that IV by adding an extra initial plaintext block, whose
+	 * value is computed with HMAC over the record sequence number.
+	 */
+	if (cc->explicit_IV) {
+		/*
+		 * We use here the fact that all the HMAC variants we
+		 * support can produce at least 16 bytes, while all the
+		 * block ciphers we support have blocks of no more than
+		 * 16 bytes. Thus, we can always truncate the HMAC output
+		 * down to the block size.
+		 */
+		br_enc64be(tmp, cc->seq);
+		br_hmac_init(&hc, &cc->mac, blen);
+		br_hmac_update(&hc, tmp, 8);
+		br_hmac_out(&hc, buf - blen);
+		rbuf = buf - blen - 5;
+	} else {
+		if (len > 1 && record_type == BR_SSL_APPLICATION_DATA) {
+			/*
+			 * To do the split, we use a recursive invocation;
+			 * since we only give one byte to the inner call,
+			 * the recursion stops there.
+			 *
+			 * We need to compute the exact size of the extra
+			 * record, so that the two resulting records end up
+			 * being sequential in RAM.
+			 *
+			 * We use here the fact that cbc_max_plaintext()
+			 * adjusted the start offset to leave room for the
+			 * initial fragment.
+			 */
+			size_t xlen;
+
+			rbuf = buf - 4
+				- ((cc->mac_len + blen + 1) & ~(blen - 1));
+			rbuf[0] = buf[0];
+			xlen = 1;
+			rbuf = cbc_encrypt(cc, record_type,
+				version, rbuf, &xlen);
+			buf ++;
+			len --;
+		} else {
+			rbuf = buf - 5;
+		}
+	}
+
+	/*
+	 * Compute MAC.
+	 */
+	br_enc64be(tmp, cc->seq ++);
+	tmp[8] = record_type;
+	br_enc16be(tmp + 9, version);
+	br_enc16be(tmp + 11, len);
+	br_hmac_init(&hc, &cc->mac, cc->mac_len);
+	br_hmac_update(&hc, tmp, 13);
+	br_hmac_update(&hc, buf, len);
+	br_hmac_out(&hc, buf + len);
+	len += cc->mac_len;
+
+	/*
+	 * Add padding.
+	 */
+	plen = blen - (len & (blen - 1));
+	memset(buf + len, (unsigned)plen - 1, plen);
+	len += plen;
+
+	/*
+	 * If an explicit IV is used, the corresponding extra block was
+	 * already put in place earlier; we just have to account for it
+	 * here.
+	 */
+	if (cc->explicit_IV) {
+		buf -= blen;
+		len += blen;
+	}
+
+	/*
+	 * Encrypt the whole thing. If there is an explicit IV, we also
+	 * encrypt it, which is fine (encryption of a uniformly random
+	 * block is still a uniformly random block).
+	 */
+	cc->bc.vtable->run(&cc->bc.vtable, cc->iv, buf, len);
+
+	/*
+	 * Add the header and return.
+	 */
+	buf[-5] = record_type;
+	br_enc16be(buf - 4, version);
+	br_enc16be(buf - 2, len);
+	*data_len = (size_t)((buf + len) - rbuf);
+	return rbuf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_cbc_class br_sslrec_out_cbc_vtable = {
+	{
+		sizeof(br_sslrec_out_cbc_context),
+		(void (*)(const br_sslrec_out_class *const *,
+			size_t *, size_t *))
+			&cbc_max_plaintext,
+		(unsigned char *(*)(const br_sslrec_out_class **,
+			int, unsigned, void *, size_t *))
+			&cbc_encrypt
+	},
+	(void (*)(const br_sslrec_out_cbc_class **,
+		const br_block_cbcenc_class *, const void *, size_t,
+		const br_hash_class *, const void *, size_t, size_t,
+		const void *))
+		&out_cbc_init
+};
diff --git a/third_party/bearssl/src/ssl_rec_ccm.c b/third_party/bearssl/src/ssl_rec_ccm.c
new file mode 100644
index 0000000..92c3295
--- /dev/null
+++ b/third_party/bearssl/src/ssl_rec_ccm.c
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2018 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * CCM initialisation. This does everything except setting the vtable,
+ * which depends on whether this is a context for encrypting or for
+ * decrypting.
+ */
+static void
+gen_ccm_init(br_sslrec_ccm_context *cc,
+	const br_block_ctrcbc_class *bc_impl,
+	const void *key, size_t key_len,
+	const void *iv, size_t tag_len)
+{
+	cc->seq = 0;
+	bc_impl->init(&cc->bc.vtable, key, key_len);
+	memcpy(cc->iv, iv, sizeof cc->iv);
+	cc->tag_len = tag_len;
+}
+
+static void
+in_ccm_init(br_sslrec_ccm_context *cc,
+	const br_block_ctrcbc_class *bc_impl,
+	const void *key, size_t key_len,
+	const void *iv, size_t tag_len)
+{
+	cc->vtable.in = &br_sslrec_in_ccm_vtable;
+	gen_ccm_init(cc, bc_impl, key, key_len, iv, tag_len);
+}
+
+static int
+ccm_check_length(const br_sslrec_ccm_context *cc, size_t rlen)
+{
+	/*
+	 * CCM overhead is 8 bytes for nonce_explicit, and the tag
+	 * (normally 8 or 16 bytes, depending on cipher suite).
+	 */
+	size_t over;
+
+	over = 8 + cc->tag_len;
+	return rlen >= over && rlen <= (16384 + over);
+}
+
+static unsigned char *
+ccm_decrypt(br_sslrec_ccm_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	br_ccm_context zc;
+	unsigned char *buf;
+	unsigned char nonce[12], header[13];
+	size_t len;
+
+	buf = (unsigned char *)data + 8;
+	len = *data_len - (8 + cc->tag_len);
+
+	/*
+	 * Make nonce (implicit + explicit parts).
+	 */
+	memcpy(nonce, cc->iv, sizeof cc->iv);
+	memcpy(nonce + 4, data, 8);
+
+	/*
+	 * Assemble synthetic header for the AAD.
+	 */
+	br_enc64be(header, cc->seq ++);
+	header[8] = (unsigned char)record_type;
+	br_enc16be(header + 9, version);
+	br_enc16be(header + 11, len);
+
+	/*
+	 * Perform CCM decryption.
+	 */
+	br_ccm_init(&zc, &cc->bc.vtable);
+	br_ccm_reset(&zc, nonce, sizeof nonce, sizeof header, len, cc->tag_len);
+	br_ccm_aad_inject(&zc, header, sizeof header);
+	br_ccm_flip(&zc);
+	br_ccm_run(&zc, 0, buf, len);
+	if (!br_ccm_check_tag(&zc, buf + len)) {
+		return NULL;
+	}
+	*data_len = len;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_in_ccm_class br_sslrec_in_ccm_vtable = {
+	{
+		sizeof(br_sslrec_ccm_context),
+		(int (*)(const br_sslrec_in_class *const *, size_t))
+			&ccm_check_length,
+		(unsigned char *(*)(const br_sslrec_in_class **,
+			int, unsigned, void *, size_t *))
+			&ccm_decrypt
+	},
+	(void (*)(const br_sslrec_in_ccm_class **,
+		const br_block_ctrcbc_class *, const void *, size_t,
+		const void *, size_t))
+		&in_ccm_init
+};
+
+static void
+out_ccm_init(br_sslrec_ccm_context *cc,
+	const br_block_ctrcbc_class *bc_impl,
+	const void *key, size_t key_len,
+	const void *iv, size_t tag_len)
+{
+	cc->vtable.out = &br_sslrec_out_ccm_vtable;
+	gen_ccm_init(cc, bc_impl, key, key_len, iv, tag_len);
+}
+
+static void
+ccm_max_plaintext(const br_sslrec_ccm_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t len;
+
+	*start += 8;
+	len = *end - *start - cc->tag_len;
+	if (len > 16384) {
+		len = 16384;
+	}
+	*end = *start + len;
+}
+
+static unsigned char *
+ccm_encrypt(br_sslrec_ccm_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	br_ccm_context zc;
+	unsigned char *buf;
+	unsigned char nonce[12], header[13];
+	size_t len;
+
+	buf = (unsigned char *)data;
+	len = *data_len;
+
+	/*
+	 * Make nonce; the explicit part is an encoding of the sequence
+	 * number.
+	 */
+	memcpy(nonce, cc->iv, sizeof cc->iv);
+	br_enc64be(nonce + 4, cc->seq);
+
+	/*
+	 * Assemble synthetic header for the AAD.
+	 */
+	br_enc64be(header, cc->seq ++);
+	header[8] = (unsigned char)record_type;
+	br_enc16be(header + 9, version);
+	br_enc16be(header + 11, len);
+
+	/*
+	 * Perform CCM encryption.
+	 */
+	br_ccm_init(&zc, &cc->bc.vtable);
+	br_ccm_reset(&zc, nonce, sizeof nonce, sizeof header, len, cc->tag_len);
+	br_ccm_aad_inject(&zc, header, sizeof header);
+	br_ccm_flip(&zc);
+	br_ccm_run(&zc, 1, buf, len);
+	br_ccm_get_tag(&zc, buf + len);
+
+	/*
+	 * Assemble header and adjust pointer/length.
+	 */
+	len += 8 + cc->tag_len;
+	buf -= 13;
+	memcpy(buf + 5, nonce + 4, 8);
+	buf[0] = (unsigned char)record_type;
+	br_enc16be(buf + 1, version);
+	br_enc16be(buf + 3, len);
+	*data_len = len + 5;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_ccm_class br_sslrec_out_ccm_vtable = {
+	{
+		sizeof(br_sslrec_ccm_context),
+		(void (*)(const br_sslrec_out_class *const *,
+			size_t *, size_t *))
+			&ccm_max_plaintext,
+		(unsigned char *(*)(const br_sslrec_out_class **,
+			int, unsigned, void *, size_t *))
+			&ccm_encrypt
+	},
+	(void (*)(const br_sslrec_out_ccm_class **,
+		const br_block_ctrcbc_class *, const void *, size_t,
+		const void *, size_t))
+		&out_ccm_init
+};
diff --git a/third_party/bearssl/src/ssl_rec_chapol.c b/third_party/bearssl/src/ssl_rec_chapol.c
new file mode 100644
index 0000000..73b3c78
--- /dev/null
+++ b/third_party/bearssl/src/ssl_rec_chapol.c
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static void
+gen_chapol_init(br_sslrec_chapol_context *cc,
+	br_chacha20_run ichacha, br_poly1305_run ipoly,
+	const void *key, const void *iv)
+{
+	cc->seq = 0;
+	cc->ichacha = ichacha;
+	cc->ipoly = ipoly;
+	memcpy(cc->key, key, sizeof cc->key);
+	memcpy(cc->iv, iv, sizeof cc->iv);
+}
+
+static void
+gen_chapol_process(br_sslrec_chapol_context *cc,
+	int record_type, unsigned version, void *data, size_t len,
+	void *tag, int encrypt)
+{
+	unsigned char header[13];
+	unsigned char nonce[12];
+	uint64_t seq;
+	size_t u;
+
+	seq = cc->seq ++;
+	br_enc64be(header, seq);
+	header[8] = (unsigned char)record_type;
+	br_enc16be(header + 9, version);
+	br_enc16be(header + 11, len);
+	memcpy(nonce, cc->iv, 12);
+	for (u = 0; u < 8; u ++) {
+		nonce[11 - u] ^= (unsigned char)seq;
+		seq >>= 8;
+	}
+	cc->ipoly(cc->key, nonce, data, len, header, sizeof header,
+		tag, cc->ichacha, encrypt);
+}
+
+static void
+in_chapol_init(br_sslrec_chapol_context *cc,
+	br_chacha20_run ichacha, br_poly1305_run ipoly,
+	const void *key, const void *iv)
+{
+	cc->vtable.in = &br_sslrec_in_chapol_vtable;
+	gen_chapol_init(cc, ichacha, ipoly, key, iv);
+}
+
+static int
+chapol_check_length(const br_sslrec_chapol_context *cc, size_t rlen)
+{
+	/*
+	 * Overhead is just the authentication tag (16 bytes).
+	 */
+	(void)cc;
+	return rlen >= 16 && rlen <= (16384 + 16);
+}
+
+static unsigned char *
+chapol_decrypt(br_sslrec_chapol_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+	size_t u, len;
+	unsigned char tag[16];
+	unsigned bad;
+
+	buf = data;
+	len = *data_len - 16;
+	gen_chapol_process(cc, record_type, version, buf, len, tag, 0);
+	bad = 0;
+	for (u = 0; u < 16; u ++) {
+		bad |= tag[u] ^ buf[len + u];
+	}
+	if (bad) {
+		return NULL;
+	}
+	*data_len = len;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_in_chapol_class br_sslrec_in_chapol_vtable = {
+	{
+		sizeof(br_sslrec_chapol_context),
+		(int (*)(const br_sslrec_in_class *const *, size_t))
+			&chapol_check_length,
+		(unsigned char *(*)(const br_sslrec_in_class **,
+			int, unsigned, void *, size_t *))
+			&chapol_decrypt
+	},
+	(void (*)(const br_sslrec_in_chapol_class **,
+		br_chacha20_run, br_poly1305_run,
+		const void *, const void *))
+		&in_chapol_init
+};
+
+static void
+out_chapol_init(br_sslrec_chapol_context *cc,
+	br_chacha20_run ichacha, br_poly1305_run ipoly,
+	const void *key, const void *iv)
+{
+	cc->vtable.out = &br_sslrec_out_chapol_vtable;
+	gen_chapol_init(cc, ichacha, ipoly, key, iv);
+}
+
+static void
+chapol_max_plaintext(const br_sslrec_chapol_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t len;
+
+	(void)cc;
+	len = *end - *start - 16;
+	if (len > 16384) {
+		len = 16384;
+	}
+	*end = *start + len;
+}
+
+static unsigned char *
+chapol_encrypt(br_sslrec_chapol_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+	size_t len;
+
+	buf = data;
+	len = *data_len;
+	gen_chapol_process(cc, record_type, version, buf, len, buf + len, 1);
+	buf -= 5;
+	buf[0] = (unsigned char)record_type;
+	br_enc16be(buf + 1, version);
+	br_enc16be(buf + 3, len + 16);
+	*data_len = len + 21;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_chapol_class br_sslrec_out_chapol_vtable = {
+	{
+		sizeof(br_sslrec_chapol_context),
+		(void (*)(const br_sslrec_out_class *const *,
+			size_t *, size_t *))
+			&chapol_max_plaintext,
+		(unsigned char *(*)(const br_sslrec_out_class **,
+			int, unsigned, void *, size_t *))
+			&chapol_encrypt
+	},
+	(void (*)(const br_sslrec_out_chapol_class **,
+		br_chacha20_run, br_poly1305_run,
+		const void *, const void *))
+		&out_chapol_init
+};
diff --git a/third_party/bearssl/src/ssl_rec_gcm.c b/third_party/bearssl/src/ssl_rec_gcm.c
new file mode 100644
index 0000000..70df277
--- /dev/null
+++ b/third_party/bearssl/src/ssl_rec_gcm.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * GCM initialisation. This does everything except setting the vtable,
+ * which depends on whether this is a context for encrypting or for
+ * decrypting.
+ */
+static void
+gen_gcm_init(br_sslrec_gcm_context *cc,
+	const br_block_ctr_class *bc_impl,
+	const void *key, size_t key_len,
+	br_ghash gh_impl,
+	const void *iv)
+{
+	unsigned char tmp[12];
+
+	cc->seq = 0;
+	bc_impl->init(&cc->bc.vtable, key, key_len);
+	cc->gh = gh_impl;
+	memcpy(cc->iv, iv, sizeof cc->iv);
+	memset(cc->h, 0, sizeof cc->h);
+	memset(tmp, 0, sizeof tmp);
+	bc_impl->run(&cc->bc.vtable, tmp, 0, cc->h, sizeof cc->h);
+}
+
+static void
+in_gcm_init(br_sslrec_gcm_context *cc,
+	const br_block_ctr_class *bc_impl,
+	const void *key, size_t key_len,
+	br_ghash gh_impl,
+	const void *iv)
+{
+	cc->vtable.in = &br_sslrec_in_gcm_vtable;
+	gen_gcm_init(cc, bc_impl, key, key_len, gh_impl, iv);
+}
+
+static int
+gcm_check_length(const br_sslrec_gcm_context *cc, size_t rlen)
+{
+	/*
+	 * GCM adds a fixed overhead:
+	 *   8 bytes for the nonce_explicit (before the ciphertext)
+	 *  16 bytes for the authentication tag (after the ciphertext)
+	 */
+	(void)cc;
+	return rlen >= 24 && rlen <= (16384 + 24);
+}
+
+/*
+ * Compute the authentication tag. The value written in 'tag' must still
+ * be CTR-encrypted.
+ */
+static void
+do_tag(br_sslrec_gcm_context *cc,
+	int record_type, unsigned version,
+	void *data, size_t len, void *tag)
+{
+	unsigned char header[13];
+	unsigned char footer[16];
+
+	/*
+	 * Compute authentication tag. Three elements must be injected in
+	 * sequence, each possibly 0-padded to reach a length multiple
+	 * of the block size: the 13-byte header (sequence number, record
+	 * type, protocol version, record length), the cipher text, and
+	 * the word containing the encodings of the bit lengths of the two
+	 * other elements.
+	 */
+	br_enc64be(header, cc->seq ++);
+	header[8] = (unsigned char)record_type;
+	br_enc16be(header + 9, version);
+	br_enc16be(header + 11, len);
+	br_enc64be(footer, (uint64_t)(sizeof header) << 3);
+	br_enc64be(footer + 8, (uint64_t)len << 3);
+	memset(tag, 0, 16);
+	cc->gh(tag, cc->h, header, sizeof header);
+	cc->gh(tag, cc->h, data, len);
+	cc->gh(tag, cc->h, footer, sizeof footer);
+}
+
+/*
+ * Do CTR encryption. This also does CTR encryption of a single block at
+ * address 'xortag' with the counter value appropriate for the final
+ * processing of the authentication tag.
+ */
+static void
+do_ctr(br_sslrec_gcm_context *cc, const void *nonce, void *data, size_t len,
+	void *xortag)
+{
+	unsigned char iv[12];
+
+	memcpy(iv, cc->iv, 4);
+	memcpy(iv + 4, nonce, 8);
+	cc->bc.vtable->run(&cc->bc.vtable, iv, 2, data, len);
+	cc->bc.vtable->run(&cc->bc.vtable, iv, 1, xortag, 16);
+}
+
+static unsigned char *
+gcm_decrypt(br_sslrec_gcm_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+	size_t len, u;
+	uint32_t bad;
+	unsigned char tag[16];
+
+	buf = (unsigned char *)data + 8;
+	len = *data_len - 24;
+	do_tag(cc, record_type, version, buf, len, tag);
+	do_ctr(cc, data, buf, len, tag);
+
+	/*
+	 * Compare the computed tag with the value from the record. It
+	 * is possibly useless to do a constant-time comparison here,
+	 * but it does not hurt.
+	 */
+	bad = 0;
+	for (u = 0; u < 16; u ++) {
+		bad |= tag[u] ^ buf[len + u];
+	}
+	if (bad) {
+		return NULL;
+	}
+	*data_len = len;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_in_gcm_class br_sslrec_in_gcm_vtable = {
+	{
+		sizeof(br_sslrec_gcm_context),
+		(int (*)(const br_sslrec_in_class *const *, size_t))
+			&gcm_check_length,
+		(unsigned char *(*)(const br_sslrec_in_class **,
+			int, unsigned, void *, size_t *))
+			&gcm_decrypt
+	},
+	(void (*)(const br_sslrec_in_gcm_class **,
+		const br_block_ctr_class *, const void *, size_t,
+		br_ghash, const void *))
+		&in_gcm_init
+};
+
+static void
+out_gcm_init(br_sslrec_gcm_context *cc,
+	const br_block_ctr_class *bc_impl,
+	const void *key, size_t key_len,
+	br_ghash gh_impl,
+	const void *iv)
+{
+	cc->vtable.out = &br_sslrec_out_gcm_vtable;
+	gen_gcm_init(cc, bc_impl, key, key_len, gh_impl, iv);
+}
+
+static void
+gcm_max_plaintext(const br_sslrec_gcm_context *cc,
+	size_t *start, size_t *end)
+{
+	size_t len;
+
+	(void)cc;
+	*start += 8;
+	len = *end - *start - 16;
+	if (len > 16384) {
+		len = 16384;
+	}
+	*end = *start + len;
+}
+
+static unsigned char *
+gcm_encrypt(br_sslrec_gcm_context *cc,
+	int record_type, unsigned version, void *data, size_t *data_len)
+{
+	unsigned char *buf;
+	size_t u, len;
+	unsigned char tmp[16];
+
+	buf = (unsigned char *)data;
+	len = *data_len;
+	memset(tmp, 0, sizeof tmp);
+	br_enc64be(buf - 8, cc->seq);
+	do_ctr(cc, buf - 8, buf, len, tmp);
+	do_tag(cc, record_type, version, buf, len, buf + len);
+	for (u = 0; u < 16; u ++) {
+		buf[len + u] ^= tmp[u];
+	}
+	len += 24;
+	buf -= 13;
+	buf[0] = (unsigned char)record_type;
+	br_enc16be(buf + 1, version);
+	br_enc16be(buf + 3, len);
+	*data_len = len + 5;
+	return buf;
+}
+
+/* see bearssl_ssl.h */
+const br_sslrec_out_gcm_class br_sslrec_out_gcm_vtable = {
+	{
+		sizeof(br_sslrec_gcm_context),
+		(void (*)(const br_sslrec_out_class *const *,
+			size_t *, size_t *))
+			&gcm_max_plaintext,
+		(unsigned char *(*)(const br_sslrec_out_class **,
+			int, unsigned, void *, size_t *))
+			&gcm_encrypt
+	},
+	(void (*)(const br_sslrec_out_gcm_class **,
+		const br_block_ctr_class *, const void *, size_t,
+		br_ghash, const void *))
+		&out_gcm_init
+};
diff --git a/third_party/bearssl/src/ssl_scert_single_ec.c b/third_party/bearssl/src/ssl_scert_single_ec.c
new file mode 100644
index 0000000..ce8d753
--- /dev/null
+++ b/third_party/bearssl/src/ssl_scert_single_ec.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static int
+se_choose(const br_ssl_server_policy_class **pctx,
+	const br_ssl_server_context *cc,
+	br_ssl_server_choices *choices)
+{
+	br_ssl_server_policy_ec_context *pc;
+	const br_suite_translated *st;
+	size_t u, st_num;
+	unsigned hash_id;
+
+	pc = (br_ssl_server_policy_ec_context *)pctx;
+	st = br_ssl_server_get_client_suites(cc, &st_num);
+	hash_id = br_ssl_choose_hash(br_ssl_server_get_client_hashes(cc) >> 8);
+	if (cc->eng.session.version < BR_TLS12) {
+		hash_id = br_sha1_ID;
+	}
+	choices->chain = pc->chain;
+	choices->chain_len = pc->chain_len;
+	for (u = 0; u < st_num; u ++) {
+		unsigned tt;
+
+		tt = st[u][1];
+		switch (tt >> 12) {
+		case BR_SSLKEYX_ECDH_RSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_KEYX) != 0
+				&& pc->cert_issuer_key_type == BR_KEYTYPE_RSA)
+			{
+				choices->cipher_suite = st[u][0];
+				return 1;
+			}
+			break;
+		case BR_SSLKEYX_ECDH_ECDSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_KEYX) != 0
+				&& pc->cert_issuer_key_type == BR_KEYTYPE_EC)
+			{
+				choices->cipher_suite = st[u][0];
+				return 1;
+			}
+			break;
+		case BR_SSLKEYX_ECDHE_ECDSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_SIGN) != 0
+				&& hash_id != 0)
+			{
+				choices->cipher_suite = st[u][0];
+				choices->algo_id = hash_id + 0xFF00;
+				return 1;
+			}
+			break;
+		}
+	}
+	return 0;
+}
+
+static uint32_t
+se_do_keyx(const br_ssl_server_policy_class **pctx,
+	unsigned char *data, size_t *len)
+{
+	br_ssl_server_policy_ec_context *pc;
+	uint32_t r;
+	size_t xoff, xlen;
+
+	pc = (br_ssl_server_policy_ec_context *)pctx;
+	r = pc->iec->mul(data, *len, pc->sk->x, pc->sk->xlen, pc->sk->curve);
+	xoff = pc->iec->xoff(pc->sk->curve, &xlen);
+	memmove(data, data + xoff, xlen);
+	*len = xlen;
+	return r;
+}
+
+static size_t
+se_do_sign(const br_ssl_server_policy_class **pctx,
+	unsigned algo_id, unsigned char *data, size_t hv_len, size_t len)
+{
+	br_ssl_server_policy_ec_context *pc;
+	unsigned char hv[64];
+	const br_hash_class *hc;
+
+	algo_id &= 0xFF;
+	pc = (br_ssl_server_policy_ec_context *)pctx;
+	hc = br_multihash_getimpl(pc->mhash, algo_id);
+	if (hc == NULL) {
+		return 0;
+	}
+	memcpy(hv, data, hv_len);
+	if (len < 139) {
+		return 0;
+	}
+	return pc->iecdsa(pc->iec, hc, hv, pc->sk, data);
+}
+
+static const br_ssl_server_policy_class se_policy_vtable = {
+	sizeof(br_ssl_server_policy_ec_context),
+	se_choose,
+	se_do_keyx,
+	se_do_sign
+};
+
+/* see bearssl_ssl.h */
+void
+br_ssl_server_set_single_ec(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_ec_private_key *sk, unsigned allowed_usages,
+	unsigned cert_issuer_key_type,
+	const br_ec_impl *iec, br_ecdsa_sign iecdsa)
+{
+	cc->chain_handler.single_ec.vtable = &se_policy_vtable;
+	cc->chain_handler.single_ec.chain = chain;
+	cc->chain_handler.single_ec.chain_len = chain_len;
+	cc->chain_handler.single_ec.sk = sk;
+	cc->chain_handler.single_ec.allowed_usages = allowed_usages;
+	cc->chain_handler.single_ec.cert_issuer_key_type = cert_issuer_key_type;
+	cc->chain_handler.single_ec.mhash = &cc->eng.mhash;
+	cc->chain_handler.single_ec.iec = iec;
+	cc->chain_handler.single_ec.iecdsa = iecdsa;
+	cc->policy_vtable = &cc->chain_handler.single_ec.vtable;
+}
diff --git a/third_party/bearssl/src/ssl_scert_single_rsa.c b/third_party/bearssl/src/ssl_scert_single_rsa.c
new file mode 100644
index 0000000..b2c7767
--- /dev/null
+++ b/third_party/bearssl/src/ssl_scert_single_rsa.c
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+static int
+sr_choose(const br_ssl_server_policy_class **pctx,
+	const br_ssl_server_context *cc,
+	br_ssl_server_choices *choices)
+{
+	br_ssl_server_policy_rsa_context *pc;
+	const br_suite_translated *st;
+	size_t u, st_num;
+	unsigned hash_id;
+	int fh;
+
+	pc = (br_ssl_server_policy_rsa_context *)pctx;
+	st = br_ssl_server_get_client_suites(cc, &st_num);
+	if (cc->eng.session.version < BR_TLS12) {
+		hash_id = 0;
+		fh = 1;
+	} else {
+		hash_id = br_ssl_choose_hash(
+			br_ssl_server_get_client_hashes(cc));
+		fh = (hash_id != 0);
+	}
+	choices->chain = pc->chain;
+	choices->chain_len = pc->chain_len;
+	for (u = 0; u < st_num; u ++) {
+		unsigned tt;
+
+		tt = st[u][1];
+		switch (tt >> 12) {
+		case BR_SSLKEYX_RSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_KEYX) != 0) {
+				choices->cipher_suite = st[u][0];
+				return 1;
+			}
+			break;
+		case BR_SSLKEYX_ECDHE_RSA:
+			if ((pc->allowed_usages & BR_KEYTYPE_SIGN) != 0 && fh) {
+				choices->cipher_suite = st[u][0];
+				choices->algo_id = hash_id + 0xFF00;
+				return 1;
+			}
+			break;
+		}
+	}
+	return 0;
+}
+
+static uint32_t
+sr_do_keyx(const br_ssl_server_policy_class **pctx,
+	unsigned char *data, size_t *len)
+{
+	br_ssl_server_policy_rsa_context *pc;
+
+	pc = (br_ssl_server_policy_rsa_context *)pctx;
+	return br_rsa_ssl_decrypt(pc->irsacore, pc->sk, data, *len);
+}
+
+/*
+ * OID for hash functions in RSA signatures.
+ */
+static const unsigned char HASH_OID_SHA1[] = {
+	0x05, 0x2B, 0x0E, 0x03, 0x02, 0x1A
+};
+
+static const unsigned char HASH_OID_SHA224[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x04
+};
+
+static const unsigned char HASH_OID_SHA256[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01
+};
+
+static const unsigned char HASH_OID_SHA384[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02
+};
+
+static const unsigned char HASH_OID_SHA512[] = {
+	0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03
+};
+
+static const unsigned char *HASH_OID[] = {
+	HASH_OID_SHA1,
+	HASH_OID_SHA224,
+	HASH_OID_SHA256,
+	HASH_OID_SHA384,
+	HASH_OID_SHA512
+};
+
+static size_t
+sr_do_sign(const br_ssl_server_policy_class **pctx,
+	unsigned algo_id, unsigned char *data, size_t hv_len, size_t len)
+{
+	br_ssl_server_policy_rsa_context *pc;
+	unsigned char hv[64];
+	size_t sig_len;
+	const unsigned char *hash_oid;
+
+	pc = (br_ssl_server_policy_rsa_context *)pctx;
+	memcpy(hv, data, hv_len);
+	algo_id &= 0xFF;
+	if (algo_id == 0) {
+		hash_oid = NULL;
+	} else if (algo_id >= 2 && algo_id <= 6) {
+		hash_oid = HASH_OID[algo_id - 2];
+	} else {
+		return 0;
+	}
+	sig_len = (pc->sk->n_bitlen + 7) >> 3;
+	if (len < sig_len) {
+		return 0;
+	}
+	return pc->irsasign(hash_oid, hv, hv_len, pc->sk, data) ? sig_len : 0;
+}
+
+static const br_ssl_server_policy_class sr_policy_vtable = {
+	sizeof(br_ssl_server_policy_rsa_context),
+	sr_choose,
+	sr_do_keyx,
+	sr_do_sign
+};
+
+/* see bearssl_ssl.h */
+void
+br_ssl_server_set_single_rsa(br_ssl_server_context *cc,
+	const br_x509_certificate *chain, size_t chain_len,
+	const br_rsa_private_key *sk, unsigned allowed_usages,
+	br_rsa_private irsacore, br_rsa_pkcs1_sign irsasign)
+{
+	cc->chain_handler.single_rsa.vtable = &sr_policy_vtable;
+	cc->chain_handler.single_rsa.chain = chain;
+	cc->chain_handler.single_rsa.chain_len = chain_len;
+	cc->chain_handler.single_rsa.sk = sk;
+	cc->chain_handler.single_rsa.allowed_usages = allowed_usages;
+	cc->chain_handler.single_rsa.irsacore = irsacore;
+	cc->chain_handler.single_rsa.irsasign = irsasign;
+	cc->policy_vtable = &cc->chain_handler.single_rsa.vtable;
+}
diff --git a/third_party/bearssl/src/sysrng.c b/third_party/bearssl/src/sysrng.c
new file mode 100644
index 0000000..5a92114
--- /dev/null
+++ b/third_party/bearssl/src/sysrng.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define BR_ENABLE_INTRINSICS   1
+#include "inner.h"
+
+#if BR_USE_GETENTROPY
+#include <unistd.h>
+#endif
+
+#if BR_USE_URANDOM
+#include <sys/types.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#endif
+
+#if BR_USE_WIN32_RAND
+#include <windows.h>
+#include <wincrypt.h>
+#pragma comment(lib, "advapi32")
+#endif
+
+/*
+ * Seeder that uses the RDRAND opcodes (on x86 CPU).
+ */
+#if BR_RDRAND
+BR_TARGETS_X86_UP
+BR_TARGET("rdrnd")
+static int
+seeder_rdrand(const br_prng_class **ctx)
+{
+	unsigned char tmp[32];
+	size_t u;
+
+	for (u = 0; u < sizeof tmp; u += sizeof(uint32_t)) {
+		int j;
+		uint32_t x;
+
+		/*
+		 * We use the 32-bit intrinsic so that code is compatible
+		 * with both 32-bit and 64-bit architectures.
+		 *
+		 * Intel recommends trying at least 10 times in case of
+		 * failure.
+		 *
+		 * AMD bug: there are reports that some AMD processors
+		 * have a bug that makes them fail silently after a
+		 * suspend/resume cycle, in which case RDRAND will report
+		 * a success but always return 0xFFFFFFFF.
+		 * see: https://bugzilla.kernel.org/show_bug.cgi?id=85911
+		 *
+		 * As a mitigation, if the 32-bit value is 0 or -1, then
+		 * it is considered a failure and tried again. This should
+		 * reliably detect the buggy case, at least. This also
+		 * implies that the selected seed values can never be
+		 * 0x00000000 or 0xFFFFFFFF, which is not a problem since
+		 * we are generating a seed for a PRNG, and we overdo it
+		 * a bit (we generate 32 bytes of randomness, and 256 bits
+		 * of entropy are really overkill).
+		 */
+		for (j = 0; j < 10; j ++) {
+			if (_rdrand32_step(&x) && x != 0 && x != (uint32_t)-1) {
+				goto next_word;
+			}
+		}
+		return 0;
+	next_word:
+		br_enc32le(tmp + u, x);
+	}
+	(*ctx)->update(ctx, tmp, sizeof tmp);
+	return 1;
+}
+BR_TARGETS_X86_DOWN
+
+static int
+rdrand_supported(void)
+{
+	/*
+	 * The RDRND support is bit 30 of ECX, as returned by CPUID.
+	 */
+	return br_cpuid(0, 0, 0x40000000, 0);
+}
+#endif
+
+/*
+ * Seeder that uses /dev/urandom (on Unix-like systems).
+ */
+#if BR_USE_URANDOM
+static int
+seeder_urandom(const br_prng_class **ctx)
+{
+	int f;
+
+	f = open("/dev/urandom", O_RDONLY);
+	if (f >= 0) {
+		unsigned char tmp[32];
+		size_t u;
+
+		for (u = 0; u < sizeof tmp;) {
+			ssize_t len;
+
+			len = read(f, tmp + u, (sizeof tmp) - u);
+			if (len < 0) {
+				if (errno == EINTR) {
+					continue;
+				}
+				break;
+			}
+			u += (size_t)len;
+		}
+		close(f);
+		if (u == sizeof tmp) {
+			(*ctx)->update(ctx, tmp, sizeof tmp);
+			return 1;
+		}
+	}
+	return 0;
+}
+#endif
+
+/*
+ * Seeder that uses getentropy() (backed by getrandom() on some systems,
+ * e.g. Linux). On failure, it will use the /dev/urandom seeder (if
+ * enabled).
+ */
+#if BR_USE_GETENTROPY
+static int
+seeder_getentropy(const br_prng_class **ctx)
+{
+	unsigned char tmp[32];
+
+	if (getentropy(tmp, sizeof tmp) == 0) {
+		(*ctx)->update(ctx, tmp, sizeof tmp);
+		return 1;
+	}
+#if BR_USE_URANDOM
+	return seeder_urandom(ctx);
+#else
+	return 0;
+#endif
+}
+#endif
+
+/*
+ * Seeder that uses CryptGenRandom() (on Windows).
+ */
+#if BR_USE_WIN32_RAND
+static int
+seeder_win32(const br_prng_class **ctx)
+{
+	HCRYPTPROV hp;
+
+	if (CryptAcquireContext(&hp, 0, 0, PROV_RSA_FULL,
+		CRYPT_VERIFYCONTEXT | CRYPT_SILENT))
+	{
+		BYTE buf[32];
+		BOOL r;
+
+		r = CryptGenRandom(hp, sizeof buf, buf);
+		CryptReleaseContext(hp, 0);
+		if (r) {
+			(*ctx)->update(ctx, buf, sizeof buf);
+			return 1;
+		}
+	}
+	return 0;
+}
+#endif
+
+/*
+ * An aggregate seeder that uses RDRAND, and falls back to an OS-provided
+ * source if RDRAND fails.
+ */
+#if BR_RDRAND && (BR_USE_GETENTROPY || BR_USE_URANDOM || BR_USE_WIN32_RAND)
+static int
+seeder_rdrand_with_fallback(const br_prng_class **ctx)
+{
+	if (!seeder_rdrand(ctx)) {
+#if BR_USE_GETENTROPY
+		return seeder_getentropy(ctx);
+#elif BR_USE_URANDOM
+		return seeder_urandom(ctx);
+#elif BR_USE_WIN32_RAND
+		return seeder_win32(ctx);
+#else
+#error "macro selection has gone wrong"
+#endif
+	}
+	return 1;
+}
+#endif
+
+/* see bearssl_rand.h */
+br_prng_seeder
+br_prng_seeder_system(const char **name)
+{
+#if BR_RDRAND
+	if (rdrand_supported()) {
+		if (name != NULL) {
+			*name = "rdrand";
+		}
+#if BR_USE_GETENTROPY || BR_USE_URANDOM || BR_USE_WIN32_RAND
+		return &seeder_rdrand_with_fallback;
+#else
+		return &seeder_rdrand;
+#endif
+	}
+#endif
+#if BR_USE_GETENTROPY
+	if (name != NULL) {
+		*name = "getentropy";
+	}
+	return &seeder_getentropy;
+#elif BR_USE_URANDOM
+	if (name != NULL) {
+		*name = "urandom";
+	}
+	return &seeder_urandom;
+#elif BR_USE_WIN32_RAND
+	if (name != NULL) {
+		*name = "win32";
+	}
+	return &seeder_win32;
+#else
+	if (name != NULL) {
+		*name = "none";
+	}
+	return 0;
+#endif
+}
diff --git a/third_party/bearssl/src/x509_decoder.c b/third_party/bearssl/src/x509_decoder.c
new file mode 100644
index 0000000..8dd970f
--- /dev/null
+++ b/third_party/bearssl/src/x509_decoder.c
@@ -0,0 +1,773 @@
+/* Automatically generated code; do not modify directly. */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+	uint32_t *dp;
+	uint32_t *rp;
+	const unsigned char *ip;
+} t0_context;
+
+static uint32_t
+t0_parse7E_unsigned(const unsigned char **p)
+{
+	uint32_t x;
+
+	x = 0;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			return x;
+		}
+	}
+}
+
+static int32_t
+t0_parse7E_signed(const unsigned char **p)
+{
+	int neg;
+	uint32_t x;
+
+	neg = ((**p) >> 6) & 1;
+	x = (uint32_t)-neg;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			if (neg) {
+				return -(int32_t)~x - 1;
+			} else {
+				return (int32_t)x;
+			}
+		}
+	}
+}
+
+#define T0_VBYTE(x, n)   (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
+#define T0_FBYTE(x, n)   (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
+#define T0_SBYTE(x)      (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
+#define T0_INT1(x)       T0_FBYTE(x, 0)
+#define T0_INT2(x)       T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT3(x)       T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT4(x)       T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT5(x)       T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+
+/* static const unsigned char t0_datablock[]; */
+
+
+void br_x509_decoder_init_main(void *t0ctx);
+
+void br_x509_decoder_run(void *t0ctx);
+
+
+
+#include "inner.h"
+
+
+
+
+
+#include "inner.h"
+
+#define CTX   ((br_x509_decoder_context *)(void *)((unsigned char *)t0ctx - offsetof(br_x509_decoder_context, cpu)))
+#define CONTEXT_NAME   br_x509_decoder_context
+
+/* see bearssl_x509.h */
+void
+br_x509_decoder_init(br_x509_decoder_context *ctx,
+	void (*append_dn)(void *ctx, const void *buf, size_t len),
+	void *append_dn_ctx)
+{
+	memset(ctx, 0, sizeof *ctx);
+	/* obsolete
+	ctx->err = 0;
+	ctx->hbuf = NULL;
+	ctx->hlen = 0;
+	*/
+	ctx->append_dn = append_dn;
+	ctx->append_dn_ctx = append_dn_ctx;
+	ctx->cpu.dp = &ctx->dp_stack[0];
+	ctx->cpu.rp = &ctx->rp_stack[0];
+	br_x509_decoder_init_main(&ctx->cpu);
+	br_x509_decoder_run(&ctx->cpu);
+}
+
+/* see bearssl_x509.h */
+void
+br_x509_decoder_push(br_x509_decoder_context *ctx,
+	const void *data, size_t len)
+{
+	ctx->hbuf = data;
+	ctx->hlen = len;
+	br_x509_decoder_run(&ctx->cpu);
+}
+
+
+
+static const unsigned char t0_datablock[] = {
+	0x00, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x09,
+	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x05, 0x09, 0x2A, 0x86,
+	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0E, 0x09, 0x2A, 0x86, 0x48, 0x86,
+	0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+	0x01, 0x01, 0x0C, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01,
+	0x0D, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x08, 0x2A, 0x86,
+	0x48, 0xCE, 0x3D, 0x03, 0x01, 0x07, 0x05, 0x2B, 0x81, 0x04, 0x00, 0x22,
+	0x05, 0x2B, 0x81, 0x04, 0x00, 0x23, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D,
+	0x04, 0x01, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x01, 0x08,
+	0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x08, 0x2A, 0x86, 0x48,
+	0xCE, 0x3D, 0x04, 0x03, 0x03, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04,
+	0x03, 0x04, 0x00, 0x1F, 0x03, 0xFC, 0x07, 0x7F, 0x0B, 0x5E, 0x0F, 0x1F,
+	0x12, 0xFE, 0x16, 0xBF, 0x1A, 0x9F, 0x1E, 0x7E, 0x22, 0x3F, 0x26, 0x1E,
+	0x29, 0xDF, 0x00, 0x1F, 0x03, 0xFD, 0x07, 0x9F, 0x0B, 0x7E, 0x0F, 0x3F,
+	0x13, 0x1E, 0x16, 0xDF, 0x1A, 0xBF, 0x1E, 0x9E, 0x22, 0x5F, 0x26, 0x3E,
+	0x29, 0xFF, 0x03, 0x55, 0x1D, 0x13
+};
+
+static const unsigned char t0_codeblock[] = {
+	0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x01, 0x00, 0x11, 0x00, 0x00, 0x01,
+	0x01, 0x09, 0x00, 0x00, 0x01, 0x01, 0x0A, 0x00, 0x00, 0x1A, 0x1A, 0x00,
+	0x00, 0x01, T0_INT1(BR_ERR_X509_BAD_BOOLEAN), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TAG_CLASS), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TAG_VALUE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TIME), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_EXTRA_ELEMENT), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INDEFINITE_LENGTH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INNER_TRUNC), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_LIMIT_EXCEEDED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_CONSTRUCTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_PRIMITIVE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_OVERFLOW), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_PARTIAL_BYTE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNEXPECTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNSUPPORTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_KEYTYPE_EC), 0x00, 0x00, 0x01, T0_INT1(BR_KEYTYPE_RSA),
+	0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, copy_dn)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, decoded)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, isCA)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_x509_decoder_context, pkey_data)), 0x01,
+	T0_INT2(BR_X509_BUFSIZE_KEY), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, notafter_days)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, notafter_seconds)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, notbefore_days)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, notbefore_seconds)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, pad)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, signer_hash_id)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, signer_key_type)), 0x00, 0x00, 0x01,
+	0x80, 0x45, 0x00, 0x00, 0x01, 0x80, 0x4E, 0x00, 0x00, 0x01, 0x80, 0x54,
+	0x00, 0x00, 0x01, 0x81, 0x36, 0x00, 0x02, 0x03, 0x00, 0x03, 0x01, 0x1B,
+	0x02, 0x01, 0x13, 0x26, 0x02, 0x00, 0x0F, 0x15, 0x00, 0x00, 0x05, 0x02,
+	0x34, 0x1D, 0x00, 0x00, 0x06, 0x02, 0x35, 0x1D, 0x00, 0x00, 0x01, 0x10,
+	0x4F, 0x00, 0x00, 0x11, 0x05, 0x02, 0x38, 0x1D, 0x4C, 0x00, 0x00, 0x11,
+	0x05, 0x02, 0x38, 0x1D, 0x4D, 0x00, 0x00, 0x06, 0x02, 0x30, 0x1D, 0x00,
+	0x00, 0x1B, 0x19, 0x01, 0x08, 0x0E, 0x26, 0x29, 0x19, 0x09, 0x00, 0x00,
+	0x01, 0x30, 0x0A, 0x1B, 0x01, 0x00, 0x01, 0x09, 0x4B, 0x05, 0x02, 0x2F,
+	0x1D, 0x00, 0x00, 0x20, 0x20, 0x00, 0x00, 0x01, 0x80, 0x5A, 0x00, 0x00,
+	0x01, 0x80, 0x62, 0x00, 0x00, 0x01, 0x80, 0x6B, 0x00, 0x00, 0x01, 0x80,
+	0x74, 0x00, 0x00, 0x01, 0x80, 0x7D, 0x00, 0x00, 0x01, 0x3D, 0x00, 0x00,
+	0x20, 0x11, 0x06, 0x04, 0x2B, 0x6B, 0x7A, 0x71, 0x00, 0x04, 0x01, 0x00,
+	0x3D, 0x25, 0x01, 0x00, 0x3C, 0x25, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x6D,
+	0x6D, 0x70, 0x1B, 0x01, 0x20, 0x11, 0x06, 0x11, 0x1A, 0x4C, 0x6B, 0x70,
+	0x01, 0x02, 0x50, 0x6E, 0x01, 0x02, 0x12, 0x06, 0x02, 0x39, 0x1D, 0x51,
+	0x70, 0x01, 0x02, 0x50, 0x6C, 0x6D, 0x7A, 0x6D, 0x7A, 0x6D, 0x65, 0x43,
+	0x24, 0x42, 0x24, 0x65, 0x41, 0x24, 0x40, 0x24, 0x51, 0x01, 0x01, 0x3C,
+	0x25, 0x6D, 0x7A, 0x01, 0x00, 0x3C, 0x25, 0x6D, 0x6D, 0x60, 0x05, 0x02,
+	0x39, 0x1D, 0x74, 0x1C, 0x06, 0x1C, 0x7A, 0x61, 0x6D, 0x3F, 0x68, 0x03,
+	0x00, 0x3F, 0x26, 0x02, 0x00, 0x09, 0x26, 0x02, 0x00, 0x0A, 0x68, 0x03,
+	0x01, 0x51, 0x51, 0x02, 0x00, 0x02, 0x01, 0x18, 0x04, 0x1E, 0x5A, 0x1C,
+	0x06, 0x18, 0x64, 0x03, 0x02, 0x51, 0x61, 0x1B, 0x03, 0x03, 0x1B, 0x3F,
+	0x23, 0x0D, 0x06, 0x02, 0x33, 0x1D, 0x62, 0x02, 0x02, 0x02, 0x03, 0x17,
+	0x04, 0x02, 0x39, 0x1D, 0x51, 0x01, 0x00, 0x3E, 0x25, 0x71, 0x01, 0x21,
+	0x5B, 0x01, 0x22, 0x5B, 0x1B, 0x01, 0x23, 0x11, 0x06, 0x28, 0x1A, 0x4C,
+	0x6B, 0x6D, 0x1B, 0x06, 0x1D, 0x6D, 0x60, 0x1A, 0x70, 0x1B, 0x01, 0x01,
+	0x11, 0x06, 0x03, 0x63, 0x1A, 0x70, 0x01, 0x04, 0x50, 0x6B, 0x4A, 0x1C,
+	0x06, 0x03, 0x5F, 0x04, 0x01, 0x7B, 0x51, 0x51, 0x04, 0x60, 0x51, 0x51,
+	0x04, 0x08, 0x01, 0x7F, 0x11, 0x05, 0x02, 0x38, 0x1D, 0x1A, 0x51, 0x6D,
+	0x60, 0x06, 0x80, 0x63, 0x75, 0x1C, 0x06, 0x06, 0x01, 0x02, 0x3B, 0x04,
+	0x80, 0x57, 0x76, 0x1C, 0x06, 0x06, 0x01, 0x03, 0x3B, 0x04, 0x80, 0x4D,
+	0x77, 0x1C, 0x06, 0x06, 0x01, 0x04, 0x3B, 0x04, 0x80, 0x43, 0x78, 0x1C,
+	0x06, 0x05, 0x01, 0x05, 0x3B, 0x04, 0x3A, 0x79, 0x1C, 0x06, 0x05, 0x01,
+	0x06, 0x3B, 0x04, 0x31, 0x55, 0x1C, 0x06, 0x05, 0x01, 0x02, 0x3A, 0x04,
+	0x28, 0x56, 0x1C, 0x06, 0x05, 0x01, 0x03, 0x3A, 0x04, 0x1F, 0x57, 0x1C,
+	0x06, 0x05, 0x01, 0x04, 0x3A, 0x04, 0x16, 0x58, 0x1C, 0x06, 0x05, 0x01,
+	0x05, 0x3A, 0x04, 0x0D, 0x59, 0x1C, 0x06, 0x05, 0x01, 0x06, 0x3A, 0x04,
+	0x04, 0x01, 0x00, 0x01, 0x00, 0x04, 0x04, 0x01, 0x00, 0x01, 0x00, 0x46,
+	0x25, 0x45, 0x25, 0x7A, 0x61, 0x7A, 0x51, 0x1A, 0x01, 0x01, 0x3D, 0x25,
+	0x73, 0x30, 0x1D, 0x00, 0x00, 0x01, 0x81, 0x06, 0x00, 0x01, 0x54, 0x0D,
+	0x06, 0x02, 0x32, 0x1D, 0x1B, 0x03, 0x00, 0x0A, 0x02, 0x00, 0x00, 0x00,
+	0x6D, 0x71, 0x1B, 0x01, 0x01, 0x11, 0x06, 0x08, 0x63, 0x01, 0x01, 0x15,
+	0x3E, 0x25, 0x04, 0x01, 0x2B, 0x7A, 0x00, 0x00, 0x70, 0x01, 0x06, 0x50,
+	0x6F, 0x00, 0x00, 0x70, 0x01, 0x03, 0x50, 0x6B, 0x72, 0x06, 0x02, 0x37,
+	0x1D, 0x00, 0x00, 0x26, 0x1B, 0x06, 0x07, 0x21, 0x1B, 0x06, 0x01, 0x16,
+	0x04, 0x76, 0x2B, 0x00, 0x00, 0x01, 0x01, 0x50, 0x6A, 0x01, 0x01, 0x10,
+	0x06, 0x02, 0x2C, 0x1D, 0x72, 0x27, 0x00, 0x00, 0x60, 0x05, 0x02, 0x39,
+	0x1D, 0x47, 0x1C, 0x06, 0x04, 0x01, 0x17, 0x04, 0x12, 0x48, 0x1C, 0x06,
+	0x04, 0x01, 0x18, 0x04, 0x0A, 0x49, 0x1C, 0x06, 0x04, 0x01, 0x19, 0x04,
+	0x02, 0x39, 0x1D, 0x00, 0x04, 0x70, 0x1B, 0x01, 0x17, 0x01, 0x18, 0x4B,
+	0x05, 0x02, 0x2F, 0x1D, 0x01, 0x18, 0x11, 0x03, 0x00, 0x4D, 0x6B, 0x66,
+	0x02, 0x00, 0x06, 0x0C, 0x01, 0x80, 0x64, 0x08, 0x03, 0x01, 0x66, 0x02,
+	0x01, 0x09, 0x04, 0x0E, 0x1B, 0x01, 0x32, 0x0D, 0x06, 0x04, 0x01, 0x80,
+	0x64, 0x09, 0x01, 0x8E, 0x6C, 0x09, 0x03, 0x01, 0x02, 0x01, 0x01, 0x82,
+	0x6D, 0x08, 0x02, 0x01, 0x01, 0x03, 0x09, 0x01, 0x04, 0x0C, 0x09, 0x02,
+	0x01, 0x01, 0x80, 0x63, 0x09, 0x01, 0x80, 0x64, 0x0C, 0x0A, 0x02, 0x01,
+	0x01, 0x83, 0x0F, 0x09, 0x01, 0x83, 0x10, 0x0C, 0x09, 0x03, 0x03, 0x01,
+	0x01, 0x01, 0x0C, 0x67, 0x2A, 0x01, 0x01, 0x0E, 0x02, 0x01, 0x01, 0x04,
+	0x07, 0x28, 0x02, 0x01, 0x01, 0x80, 0x64, 0x07, 0x27, 0x02, 0x01, 0x01,
+	0x83, 0x10, 0x07, 0x28, 0x1F, 0x15, 0x06, 0x03, 0x01, 0x18, 0x09, 0x5D,
+	0x09, 0x52, 0x1B, 0x01, 0x05, 0x14, 0x02, 0x03, 0x09, 0x03, 0x03, 0x01,
+	0x1F, 0x15, 0x01, 0x01, 0x26, 0x67, 0x02, 0x03, 0x09, 0x2A, 0x03, 0x03,
+	0x01, 0x00, 0x01, 0x17, 0x67, 0x01, 0x9C, 0x10, 0x08, 0x03, 0x02, 0x01,
+	0x00, 0x01, 0x3B, 0x67, 0x01, 0x3C, 0x08, 0x02, 0x02, 0x09, 0x03, 0x02,
+	0x01, 0x00, 0x01, 0x3C, 0x67, 0x02, 0x02, 0x09, 0x03, 0x02, 0x72, 0x1B,
+	0x01, 0x2E, 0x11, 0x06, 0x0D, 0x1A, 0x72, 0x1B, 0x01, 0x30, 0x01, 0x39,
+	0x4B, 0x06, 0x03, 0x1A, 0x04, 0x74, 0x01, 0x80, 0x5A, 0x10, 0x06, 0x02,
+	0x2F, 0x1D, 0x51, 0x02, 0x03, 0x02, 0x02, 0x00, 0x01, 0x72, 0x53, 0x01,
+	0x0A, 0x08, 0x03, 0x00, 0x72, 0x53, 0x02, 0x00, 0x09, 0x00, 0x02, 0x03,
+	0x00, 0x03, 0x01, 0x66, 0x1B, 0x02, 0x01, 0x02, 0x00, 0x4B, 0x05, 0x02,
+	0x2F, 0x1D, 0x00, 0x00, 0x23, 0x70, 0x01, 0x02, 0x50, 0x0B, 0x69, 0x00,
+	0x03, 0x1B, 0x03, 0x00, 0x03, 0x01, 0x03, 0x02, 0x6B, 0x72, 0x1B, 0x01,
+	0x81, 0x00, 0x13, 0x06, 0x02, 0x36, 0x1D, 0x1B, 0x01, 0x00, 0x11, 0x06,
+	0x0B, 0x1A, 0x1B, 0x05, 0x04, 0x1A, 0x01, 0x00, 0x00, 0x72, 0x04, 0x6F,
+	0x02, 0x01, 0x1B, 0x05, 0x02, 0x33, 0x1D, 0x2A, 0x03, 0x01, 0x02, 0x02,
+	0x25, 0x02, 0x02, 0x29, 0x03, 0x02, 0x1B, 0x06, 0x03, 0x72, 0x04, 0x68,
+	0x1A, 0x02, 0x00, 0x02, 0x01, 0x0A, 0x00, 0x01, 0x72, 0x1B, 0x01, 0x81,
+	0x00, 0x0D, 0x06, 0x01, 0x00, 0x01, 0x81, 0x00, 0x0A, 0x1B, 0x05, 0x02,
+	0x31, 0x1D, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x12, 0x06,
+	0x19, 0x02, 0x00, 0x2A, 0x03, 0x00, 0x1B, 0x01, 0x83, 0xFF, 0xFF, 0x7F,
+	0x12, 0x06, 0x02, 0x32, 0x1D, 0x01, 0x08, 0x0E, 0x26, 0x72, 0x23, 0x09,
+	0x04, 0x60, 0x00, 0x00, 0x6A, 0x5E, 0x00, 0x00, 0x6B, 0x7A, 0x00, 0x00,
+	0x70, 0x4E, 0x6B, 0x00, 0x01, 0x6B, 0x1B, 0x05, 0x02, 0x36, 0x1D, 0x72,
+	0x1B, 0x01, 0x81, 0x00, 0x13, 0x06, 0x02, 0x36, 0x1D, 0x03, 0x00, 0x1B,
+	0x06, 0x16, 0x72, 0x02, 0x00, 0x1B, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x13,
+	0x06, 0x02, 0x36, 0x1D, 0x01, 0x08, 0x0E, 0x09, 0x03, 0x00, 0x04, 0x67,
+	0x1A, 0x02, 0x00, 0x00, 0x00, 0x6B, 0x1B, 0x01, 0x81, 0x7F, 0x12, 0x06,
+	0x08, 0x7A, 0x01, 0x00, 0x44, 0x25, 0x01, 0x00, 0x00, 0x1B, 0x44, 0x25,
+	0x44, 0x29, 0x62, 0x01, 0x7F, 0x00, 0x01, 0x72, 0x03, 0x00, 0x02, 0x00,
+	0x01, 0x05, 0x14, 0x01, 0x01, 0x15, 0x1E, 0x02, 0x00, 0x01, 0x06, 0x14,
+	0x1B, 0x01, 0x01, 0x15, 0x06, 0x02, 0x2D, 0x1D, 0x01, 0x04, 0x0E, 0x02,
+	0x00, 0x01, 0x1F, 0x15, 0x1B, 0x01, 0x1F, 0x11, 0x06, 0x02, 0x2E, 0x1D,
+	0x09, 0x00, 0x00, 0x1B, 0x05, 0x05, 0x01, 0x00, 0x01, 0x7F, 0x00, 0x70,
+	0x00, 0x00, 0x1B, 0x05, 0x02, 0x32, 0x1D, 0x2A, 0x73, 0x00, 0x00, 0x22,
+	0x1B, 0x01, 0x00, 0x13, 0x06, 0x01, 0x00, 0x1A, 0x16, 0x04, 0x74, 0x00,
+	0x01, 0x01, 0x00, 0x00, 0x01, 0x0B, 0x00, 0x00, 0x01, 0x15, 0x00, 0x00,
+	0x01, 0x1F, 0x00, 0x00, 0x01, 0x29, 0x00, 0x00, 0x01, 0x33, 0x00, 0x00,
+	0x7B, 0x1A, 0x00, 0x00, 0x1B, 0x06, 0x07, 0x7C, 0x1B, 0x06, 0x01, 0x16,
+	0x04, 0x76, 0x00, 0x00, 0x01, 0x00, 0x20, 0x21, 0x0B, 0x2B, 0x00
+};
+
+static const uint16_t t0_caddr[] = {
+	0,
+	5,
+	10,
+	15,
+	20,
+	24,
+	28,
+	32,
+	36,
+	40,
+	44,
+	48,
+	52,
+	56,
+	60,
+	64,
+	68,
+	72,
+	76,
+	80,
+	84,
+	88,
+	93,
+	98,
+	103,
+	111,
+	116,
+	121,
+	126,
+	131,
+	136,
+	141,
+	146,
+	151,
+	156,
+	161,
+	166,
+	181,
+	187,
+	193,
+	198,
+	206,
+	214,
+	220,
+	231,
+	246,
+	250,
+	255,
+	260,
+	265,
+	270,
+	275,
+	279,
+	289,
+	620,
+	625,
+	639,
+	659,
+	666,
+	678,
+	692,
+	707,
+	740,
+	960,
+	974,
+	991,
+	1000,
+	1067,
+	1123,
+	1127,
+	1131,
+	1136,
+	1184,
+	1210,
+	1254,
+	1265,
+	1274,
+	1287,
+	1291,
+	1295,
+	1299,
+	1303,
+	1307,
+	1311,
+	1315,
+	1327
+};
+
+#define T0_INTERPRETED   39
+
+#define T0_ENTER(ip, rp, slot)   do { \
+		const unsigned char *t0_newip; \
+		uint32_t t0_lnum; \
+		t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
+		t0_lnum = t0_parse7E_unsigned(&t0_newip); \
+		(rp) += t0_lnum; \
+		*((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
+		(ip) = t0_newip; \
+	} while (0)
+
+#define T0_DEFENTRY(name, slot) \
+void \
+name(void *ctx) \
+{ \
+	t0_context *t0ctx = ctx; \
+	t0ctx->ip = &t0_codeblock[0]; \
+	T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
+}
+
+T0_DEFENTRY(br_x509_decoder_init_main, 92)
+
+#define T0_NEXT(t0ipp)   (*(*(t0ipp)) ++)
+
+void
+br_x509_decoder_run(void *t0ctx)
+{
+	uint32_t *dp, *rp;
+	const unsigned char *ip;
+
+#define T0_LOCAL(x)    (*(rp - 2 - (x)))
+#define T0_POP()       (*-- dp)
+#define T0_POPi()      (*(int32_t *)(-- dp))
+#define T0_PEEK(x)     (*(dp - 1 - (x)))
+#define T0_PEEKi(x)    (*(int32_t *)(dp - 1 - (x)))
+#define T0_PUSH(v)     do { *dp = (v); dp ++; } while (0)
+#define T0_PUSHi(v)    do { *(int32_t *)dp = (v); dp ++; } while (0)
+#define T0_RPOP()      (*-- rp)
+#define T0_RPOPi()     (*(int32_t *)(-- rp))
+#define T0_RPUSH(v)    do { *rp = (v); rp ++; } while (0)
+#define T0_RPUSHi(v)   do { *(int32_t *)rp = (v); rp ++; } while (0)
+#define T0_ROLL(x)     do { \
+	size_t t0len = (size_t)(x); \
+	uint32_t t0tmp = *(dp - 1 - t0len); \
+	memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_SWAP()      do { \
+	uint32_t t0tmp = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_ROT()       do { \
+	uint32_t t0tmp = *(dp - 3); \
+	*(dp - 3) = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_NROT()       do { \
+	uint32_t t0tmp = *(dp - 1); \
+	*(dp - 1) = *(dp - 2); \
+	*(dp - 2) = *(dp - 3); \
+	*(dp - 3) = t0tmp; \
+} while (0)
+#define T0_PICK(x)      do { \
+	uint32_t t0depth = (x); \
+	T0_PUSH(T0_PEEK(t0depth)); \
+} while (0)
+#define T0_CO()         do { \
+	goto t0_exit; \
+} while (0)
+#define T0_RET()        goto t0_next
+
+	dp = ((t0_context *)t0ctx)->dp;
+	rp = ((t0_context *)t0ctx)->rp;
+	ip = ((t0_context *)t0ctx)->ip;
+	goto t0_next;
+	for (;;) {
+		uint32_t t0x;
+
+	t0_next:
+		t0x = T0_NEXT(&ip);
+		if (t0x < T0_INTERPRETED) {
+			switch (t0x) {
+				int32_t t0off;
+
+			case 0: /* ret */
+				t0x = T0_RPOP();
+				rp -= (t0x >> 16);
+				t0x &= 0xFFFF;
+				if (t0x == 0) {
+					ip = NULL;
+					goto t0_exit;
+				}
+				ip = &t0_codeblock[t0x];
+				break;
+			case 1: /* literal constant */
+				T0_PUSHi(t0_parse7E_signed(&ip));
+				break;
+			case 2: /* read local */
+				T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
+				break;
+			case 3: /* write local */
+				T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
+				break;
+			case 4: /* jump */
+				t0off = t0_parse7E_signed(&ip);
+				ip += t0off;
+				break;
+			case 5: /* jump if */
+				t0off = t0_parse7E_signed(&ip);
+				if (T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 6: /* jump if not */
+				t0off = t0_parse7E_signed(&ip);
+				if (!T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 7: {
+				/* %25 */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSHi(a % b);
+
+				}
+				break;
+			case 8: {
+				/* * */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a * b);
+
+				}
+				break;
+			case 9: {
+				/* + */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a + b);
+
+				}
+				break;
+			case 10: {
+				/* - */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a - b);
+
+				}
+				break;
+			case 11: {
+				/* -rot */
+ T0_NROT(); 
+				}
+				break;
+			case 12: {
+				/* / */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSHi(a / b);
+
+				}
+				break;
+			case 13: {
+				/* < */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a < b));
+
+				}
+				break;
+			case 14: {
+				/* << */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x << c);
+
+				}
+				break;
+			case 15: {
+				/* <= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a <= b));
+
+				}
+				break;
+			case 16: {
+				/* <> */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a != b));
+
+				}
+				break;
+			case 17: {
+				/* = */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a == b));
+
+				}
+				break;
+			case 18: {
+				/* > */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a > b));
+
+				}
+				break;
+			case 19: {
+				/* >= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a >= b));
+
+				}
+				break;
+			case 20: {
+				/* >> */
+
+	int c = (int)T0_POPi();
+	int32_t x = T0_POPi();
+	T0_PUSHi(x >> c);
+
+				}
+				break;
+			case 21: {
+				/* and */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a & b);
+
+				}
+				break;
+			case 22: {
+				/* co */
+ T0_CO(); 
+				}
+				break;
+			case 23: {
+				/* copy-ec-pkey */
+
+	size_t qlen = T0_POP();
+	uint32_t curve = T0_POP();
+	CTX->pkey.key_type = BR_KEYTYPE_EC;
+	CTX->pkey.key.ec.curve = curve;
+	CTX->pkey.key.ec.q = CTX->pkey_data;
+	CTX->pkey.key.ec.qlen = qlen;
+
+				}
+				break;
+			case 24: {
+				/* copy-rsa-pkey */
+
+	size_t elen = T0_POP();
+	size_t nlen = T0_POP();
+	CTX->pkey.key_type = BR_KEYTYPE_RSA;
+	CTX->pkey.key.rsa.n = CTX->pkey_data;
+	CTX->pkey.key.rsa.nlen = nlen;
+	CTX->pkey.key.rsa.e = CTX->pkey_data + nlen;
+	CTX->pkey.key.rsa.elen = elen;
+
+				}
+				break;
+			case 25: {
+				/* data-get8 */
+
+	size_t addr = T0_POP();
+	T0_PUSH(t0_datablock[addr]);
+
+				}
+				break;
+			case 26: {
+				/* drop */
+ (void)T0_POP(); 
+				}
+				break;
+			case 27: {
+				/* dup */
+ T0_PUSH(T0_PEEK(0)); 
+				}
+				break;
+			case 28: {
+				/* eqOID */
+
+	const unsigned char *a2 = &t0_datablock[T0_POP()];
+	const unsigned char *a1 = &CTX->pad[0];
+	size_t len = a1[0];
+	int x;
+	if (len == a2[0]) {
+		x = -(memcmp(a1 + 1, a2 + 1, len) == 0);
+	} else {
+		x = 0;
+	}
+	T0_PUSH((uint32_t)x);
+
+				}
+				break;
+			case 29: {
+				/* fail */
+
+	CTX->err = T0_POPi();
+	T0_CO();
+
+				}
+				break;
+			case 30: {
+				/* neg */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(-a);
+
+				}
+				break;
+			case 31: {
+				/* or */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a | b);
+
+				}
+				break;
+			case 32: {
+				/* over */
+ T0_PUSH(T0_PEEK(1)); 
+				}
+				break;
+			case 33: {
+				/* read-blob-inner */
+
+	uint32_t len = T0_POP();
+	uint32_t addr = T0_POP();
+	size_t clen = CTX->hlen;
+	if (clen > len) {
+		clen = (size_t)len;
+	}
+	if (addr != 0) {
+		memcpy((unsigned char *)CTX + addr, CTX->hbuf, clen);
+	}
+	if (CTX->copy_dn && CTX->append_dn) {
+		CTX->append_dn(CTX->append_dn_ctx, CTX->hbuf, clen);
+	}
+	CTX->hbuf += clen;
+	CTX->hlen -= clen;
+	T0_PUSH(addr + clen);
+	T0_PUSH(len - clen);
+
+				}
+				break;
+			case 34: {
+				/* read8-low */
+
+	if (CTX->hlen == 0) {
+		T0_PUSHi(-1);
+	} else {
+		unsigned char x = *CTX->hbuf ++;
+		if (CTX->copy_dn && CTX->append_dn) {
+			CTX->append_dn(CTX->append_dn_ctx, &x, 1);
+		}
+		CTX->hlen --;
+		T0_PUSH(x);
+	}
+
+				}
+				break;
+			case 35: {
+				/* rot */
+ T0_ROT(); 
+				}
+				break;
+			case 36: {
+				/* set32 */
+
+	uint32_t addr = T0_POP();
+	*(uint32_t *)(void *)((unsigned char *)CTX + addr) = T0_POP();
+
+				}
+				break;
+			case 37: {
+				/* set8 */
+
+	uint32_t addr = T0_POP();
+	*((unsigned char *)CTX + addr) = (unsigned char)T0_POP();
+
+				}
+				break;
+			case 38: {
+				/* swap */
+ T0_SWAP(); 
+				}
+				break;
+			}
+
+		} else {
+			T0_ENTER(ip, rp, t0x);
+		}
+	}
+t0_exit:
+	((t0_context *)t0ctx)->dp = dp;
+	((t0_context *)t0ctx)->rp = rp;
+	((t0_context *)t0ctx)->ip = ip;
+}
diff --git a/third_party/bearssl/src/x509_knownkey.c b/third_party/bearssl/src/x509_knownkey.c
new file mode 100644
index 0000000..7674f3f
--- /dev/null
+++ b/third_party/bearssl/src/x509_knownkey.c
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+void
+br_x509_knownkey_init_rsa(br_x509_knownkey_context *ctx,
+	const br_rsa_public_key *pk, unsigned usages)
+{
+	ctx->vtable = &br_x509_knownkey_vtable;
+	ctx->pkey.key_type = BR_KEYTYPE_RSA;
+	ctx->pkey.key.rsa = *pk;
+	ctx->usages = usages;
+}
+
+/* see bearssl_x509.h */
+void
+br_x509_knownkey_init_ec(br_x509_knownkey_context *ctx,
+	const br_ec_public_key *pk, unsigned usages)
+{
+	ctx->vtable = &br_x509_knownkey_vtable;
+	ctx->pkey.key_type = BR_KEYTYPE_EC;
+	ctx->pkey.key.ec = *pk;
+	ctx->usages = usages;
+}
+
+static void
+kk_start_chain(const br_x509_class **ctx, const char *server_name)
+{
+	(void)ctx;
+	(void)server_name;
+}
+
+static void
+kk_start_cert(const br_x509_class **ctx, uint32_t length)
+{
+	(void)ctx;
+	(void)length;
+}
+
+static void
+kk_append(const br_x509_class **ctx, const unsigned char *buf, size_t len)
+{
+	(void)ctx;
+	(void)buf;
+	(void)len;
+}
+
+static void
+kk_end_cert(const br_x509_class **ctx)
+{
+	(void)ctx;
+}
+
+static unsigned
+kk_end_chain(const br_x509_class **ctx)
+{
+	(void)ctx;
+	return 0;
+}
+
+static const br_x509_pkey *
+kk_get_pkey(const br_x509_class *const *ctx, unsigned *usages)
+{
+	const br_x509_knownkey_context *xc;
+
+	xc = (const br_x509_knownkey_context *)ctx;
+	if (usages != NULL) {
+		*usages = xc->usages;
+	}
+	return &xc->pkey;
+}
+
+/* see bearssl_x509.h */
+const br_x509_class br_x509_knownkey_vtable = {
+	sizeof(br_x509_knownkey_context),
+	kk_start_chain,
+	kk_start_cert,
+	kk_append,
+	kk_end_cert,
+	kk_end_chain,
+	kk_get_pkey
+};
diff --git a/third_party/bearssl/src/x509_minimal.c b/third_party/bearssl/src/x509_minimal.c
new file mode 100644
index 0000000..b3079de
--- /dev/null
+++ b/third_party/bearssl/src/x509_minimal.c
@@ -0,0 +1,1697 @@
+/* Automatically generated code; do not modify directly. */
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct {
+	uint32_t *dp;
+	uint32_t *rp;
+	const unsigned char *ip;
+} t0_context;
+
+static uint32_t
+t0_parse7E_unsigned(const unsigned char **p)
+{
+	uint32_t x;
+
+	x = 0;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			return x;
+		}
+	}
+}
+
+static int32_t
+t0_parse7E_signed(const unsigned char **p)
+{
+	int neg;
+	uint32_t x;
+
+	neg = ((**p) >> 6) & 1;
+	x = (uint32_t)-neg;
+	for (;;) {
+		unsigned y;
+
+		y = *(*p) ++;
+		x = (x << 7) | (uint32_t)(y & 0x7F);
+		if (y < 0x80) {
+			if (neg) {
+				return -(int32_t)~x - 1;
+			} else {
+				return (int32_t)x;
+			}
+		}
+	}
+}
+
+#define T0_VBYTE(x, n)   (unsigned char)((((uint32_t)(x) >> (n)) & 0x7F) | 0x80)
+#define T0_FBYTE(x, n)   (unsigned char)(((uint32_t)(x) >> (n)) & 0x7F)
+#define T0_SBYTE(x)      (unsigned char)((((uint32_t)(x) >> 28) + 0xF8) ^ 0xF8)
+#define T0_INT1(x)       T0_FBYTE(x, 0)
+#define T0_INT2(x)       T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT3(x)       T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT4(x)       T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+#define T0_INT5(x)       T0_SBYTE(x), T0_VBYTE(x, 21), T0_VBYTE(x, 14), T0_VBYTE(x, 7), T0_FBYTE(x, 0)
+
+/* static const unsigned char t0_datablock[]; */
+
+
+void br_x509_minimal_init_main(void *t0ctx);
+
+void br_x509_minimal_run(void *t0ctx);
+
+
+
+#include "inner.h"
+
+
+
+
+
+#include "inner.h"
+
+/*
+ * Implementation Notes
+ * --------------------
+ *
+ * The C code pushes the data by chunks; all decoding is done in the
+ * T0 code. The cert_length value is set to the certificate length when
+ * a new certificate is started; the T0 code picks it up as outer limit,
+ * and decoding functions use it to ensure that no attempt is made at
+ * reading past it. The T0 code also checks that once the certificate is
+ * decoded, there are no trailing bytes.
+ *
+ * The T0 code sets cert_length to 0 when the certificate is fully
+ * decoded.
+ *
+ * The C code must still perform two checks:
+ *
+ *  -- If the certificate length is 0, then the T0 code will not be
+ *  invoked at all. This invalid condition must thus be reported by the
+ *  C code.
+ *
+ *  -- When reaching the end of certificate, the C code must verify that
+ *  the certificate length has been set to 0, thereby signaling that
+ *  the T0 code properly decoded a certificate.
+ *
+ * Processing of a chain works in the following way:
+ *
+ *  -- The error flag is set to a non-zero value when validation is
+ *  finished. The value is either BR_ERR_X509_OK (validation is
+ *  successful) or another non-zero error code. When a non-zero error
+ *  code is obtained, the remaining bytes in the current certificate and
+ *  the subsequent certificates (if any) are completely ignored.
+ *
+ *  -- Each certificate is decoded in due course, with the following
+ *  "interesting points":
+ *
+ *     -- Start of the TBS: the multihash engine is reset and activated.
+ *
+ *     -- Start of the issuer DN: the secondary hash engine is started,
+ *     to process the encoded issuer DN.
+ *
+ *     -- End of the issuer DN: the secondary hash engine is stopped. The
+ *     resulting hash value is computed and then copied into the
+ *     next_dn_hash[] buffer.
+ *
+ *     -- Start of the subject DN: the secondary hash engine is started,
+ *     to process the encoded subject DN.
+ *
+ *     -- For the EE certificate only: the Common Name, if any, is matched
+ *     against the expected server name.
+ *
+ *     -- End of the subject DN: the secondary hash engine is stopped. The
+ *     resulting hash value is computed into the pad. It is then processed:
+ *
+ *        -- If this is the EE certificate, then the hash is ignored
+ *        (except for direct trust processing, see later; the hash is
+ *        simply left in current_dn_hash[]).
+ *
+ *        -- Otherwise, the hashed subject DN is compared with the saved
+ *        hash value (in saved_dn_hash[]). They must match.
+ *
+ *     Either way, the next_dn_hash[] value is then copied into the
+ *     saved_dn_hash[] value. Thus, at that point, saved_dn_hash[]
+ *     contains the hash of the issuer DN for the current certificate,
+ *     and current_dn_hash[] contains the hash of the subject DN for the
+ *     current certificate.
+ *
+ *     -- Public key: it is decoded into the cert_pkey[] buffer. Unknown
+ *     key types are reported at that point.
+ *
+ *        -- If this is the EE certificate, then the key type is compared
+ *        with the expected key type (initialization parameter). The public
+ *        key data is copied to ee_pkey_data[]. The key and hashed subject
+ *        DN are also compared with the "direct trust" keys; if the key
+ *        and DN are matched, then validation ends with a success.
+ *
+ *        -- Otherwise, the saved signature (cert_sig[]) is verified
+ *        against the saved TBS hash (tbs_hash[]) and that freshly
+ *        decoded public key. Failure here ends validation with an error.
+ *
+ *     -- Extensions: extension values are processed in due order.
+ *
+ *        -- Basic Constraints: for all certificates except EE, must be
+ *        present, indicate a CA, and have a path length compatible with
+ *        the chain length so far.
+ *
+ *        -- Key Usage: for the EE, if present, must allow signatures
+ *        or encryption/key exchange, as required for the cipher suite.
+ *        For non-EE, if present, must have the "certificate sign" bit.
+ *
+ *        -- Subject Alt Name: for the EE, dNSName names are matched
+ *        against the server name. Ignored for non-EE.
+ *
+ *        -- Authority Key Identifier, Subject Key Identifier, Issuer
+ *        Alt Name, Subject Directory Attributes, CRL Distribution Points
+ *        Freshest CRL, Authority Info Access and Subject Info Access
+ *        extensions are always ignored: they either contain only
+ *        informative data, or they relate to revocation processing, which
+ *        we explicitly do not support.
+ *
+ *        -- All other extensions are ignored if non-critical. If a
+ *        critical extension other than the ones above is encountered,
+ *        then a failure is reported.
+ *
+ *     -- End of the TBS: the multihash engine is stopped.
+ *
+ *     -- Signature algorithm: the signature algorithm on the
+ *     certificate is decoded. A failure is reported if that algorithm
+ *     is unknown. The hashed TBS corresponding to the signature hash
+ *     function is computed and stored in tbs_hash[] (if not supported,
+ *     then a failure is reported). The hash OID and length are stored
+ *     in cert_sig_hash_oid and cert_sig_hash_len.
+ *
+ *     -- Signature value: the signature value is copied into the
+ *     cert_sig[] array.
+ *
+ *     -- Certificate end: the hashed issuer DN (saved_dn_hash[]) is
+ *     looked up in the trust store (CA trust anchors only); for all
+ *     that match, the signature (cert_sig[]) is verified against the
+ *     anchor public key (hashed TBS is in tbs_hash[]). If one of these
+ *     signatures is valid, then validation ends with a success.
+ *
+ *  -- If the chain end is reached without obtaining a validation success,
+ *  then validation is reported as failed.
+ */
+
+/*
+ * The T0 compiler will produce these prototypes declarations in the
+ * header.
+ *
+void br_x509_minimal_init_main(void *ctx);
+void br_x509_minimal_run(void *ctx);
+ */
+
+/* see bearssl_x509.h */
+void
+br_x509_minimal_init(br_x509_minimal_context *ctx,
+	const br_hash_class *dn_hash_impl,
+	const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num)
+{
+	memset(ctx, 0, sizeof *ctx);
+	ctx->vtable = &br_x509_minimal_vtable;
+	ctx->dn_hash_impl = dn_hash_impl;
+	ctx->trust_anchors = trust_anchors;
+	ctx->trust_anchors_num = trust_anchors_num;
+}
+
+static void
+xm_start_chain(const br_x509_class **ctx, const char *server_name)
+{
+	br_x509_minimal_context *cc;
+	size_t u;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	for (u = 0; u < cc->num_name_elts; u ++) {
+		cc->name_elts[u].status = 0;
+		cc->name_elts[u].buf[0] = 0;
+	}
+	memset(&cc->pkey, 0, sizeof cc->pkey);
+	cc->num_certs = 0;
+	cc->err = 0;
+	cc->cpu.dp = cc->dp_stack;
+	cc->cpu.rp = cc->rp_stack;
+	br_x509_minimal_init_main(&cc->cpu);
+	if (server_name == NULL || *server_name == 0) {
+		cc->server_name = NULL;
+	} else {
+		cc->server_name = server_name;
+	}
+}
+
+static void
+xm_start_cert(const br_x509_class **ctx, uint32_t length)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err != 0) {
+		return;
+	}
+	if (length == 0) {
+		cc->err = BR_ERR_X509_TRUNCATED;
+		return;
+	}
+	cc->cert_length = length;
+}
+
+static void
+xm_append(const br_x509_class **ctx, const unsigned char *buf, size_t len)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err != 0) {
+		return;
+	}
+	cc->hbuf = buf;
+	cc->hlen = len;
+	br_x509_minimal_run(&cc->cpu);
+}
+
+static void
+xm_end_cert(const br_x509_class **ctx)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err == 0 && cc->cert_length != 0) {
+		cc->err = BR_ERR_X509_TRUNCATED;
+	}
+	cc->num_certs ++;
+}
+
+static unsigned
+xm_end_chain(const br_x509_class **ctx)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err == 0) {
+		if (cc->num_certs == 0) {
+			cc->err = BR_ERR_X509_EMPTY_CHAIN;
+		} else {
+			cc->err = BR_ERR_X509_NOT_TRUSTED;
+		}
+	} else if (cc->err == BR_ERR_X509_OK) {
+		return 0;
+	}
+	return (unsigned)cc->err;
+}
+
+static const br_x509_pkey *
+xm_get_pkey(const br_x509_class *const *ctx, unsigned *usages)
+{
+	br_x509_minimal_context *cc;
+
+	cc = (br_x509_minimal_context *)(void *)ctx;
+	if (cc->err == BR_ERR_X509_OK
+		|| cc->err == BR_ERR_X509_NOT_TRUSTED)
+	{
+		if (usages != NULL) {
+			*usages = cc->key_usages;
+		}
+		return &((br_x509_minimal_context *)(void *)ctx)->pkey;
+	} else {
+		return NULL;
+	}
+}
+
+/* see bearssl_x509.h */
+const br_x509_class br_x509_minimal_vtable = {
+	sizeof(br_x509_minimal_context),
+	xm_start_chain,
+	xm_start_cert,
+	xm_append,
+	xm_end_cert,
+	xm_end_chain,
+	xm_get_pkey
+};
+
+#define CTX   ((br_x509_minimal_context *)(void *)((unsigned char *)t0ctx - offsetof(br_x509_minimal_context, cpu)))
+#define CONTEXT_NAME   br_x509_minimal_context
+
+#define DNHASH_LEN   ((CTX->dn_hash_impl->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK)
+
+/*
+ * Hash a DN (from a trust anchor) into the provided buffer. This uses the
+ * DN hash implementation and context structure from the X.509 engine
+ * context.
+ */
+static void
+hash_dn(br_x509_minimal_context *ctx, const void *dn, size_t len,
+	unsigned char *out)
+{
+	ctx->dn_hash_impl->init(&ctx->dn_hash.vtable);
+	ctx->dn_hash_impl->update(&ctx->dn_hash.vtable, dn, len);
+	ctx->dn_hash_impl->out(&ctx->dn_hash.vtable, out);
+}
+
+/*
+ * Compare two big integers for equality. The integers use unsigned big-endian
+ * encoding; extra leading bytes (of value 0) are allowed.
+ */
+static int
+eqbigint(const unsigned char *b1, size_t len1,
+	const unsigned char *b2, size_t len2)
+{
+	while (len1 > 0 && *b1 == 0) {
+		b1 ++;
+		len1 --;
+	}
+	while (len2 > 0 && *b2 == 0) {
+		b2 ++;
+		len2 --;
+	}
+	if (len1 != len2) {
+		return 0;
+	}
+	return memcmp(b1, b2, len1) == 0;
+}
+
+/*
+ * Compare two strings for equality, in a case-insensitive way. This
+ * function handles casing only for ASCII letters.
+ */
+static int
+eqnocase(const void *s1, const void *s2, size_t len)
+{
+	const unsigned char *buf1, *buf2;
+
+	buf1 = s1;
+	buf2 = s2;
+	while (len -- > 0) {
+		int x1, x2;
+
+		x1 = *buf1 ++;
+		x2 = *buf2 ++;
+		if (x1 >= 'A' && x1 <= 'Z') {
+			x1 += 'a' - 'A';
+		}
+		if (x2 >= 'A' && x2 <= 'Z') {
+			x2 += 'a' - 'A';
+		}
+		if (x1 != x2) {
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static int verify_signature(br_x509_minimal_context *ctx,
+	const br_x509_pkey *pk);
+
+
+
+static const unsigned char t0_datablock[] = {
+	0x00, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x01, 0x09,
+	0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x05, 0x09, 0x2A, 0x86,
+	0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01, 0x0E, 0x09, 0x2A, 0x86, 0x48, 0x86,
+	0xF7, 0x0D, 0x01, 0x01, 0x0B, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D,
+	0x01, 0x01, 0x0C, 0x09, 0x2A, 0x86, 0x48, 0x86, 0xF7, 0x0D, 0x01, 0x01,
+	0x0D, 0x05, 0x2B, 0x0E, 0x03, 0x02, 0x1A, 0x09, 0x60, 0x86, 0x48, 0x01,
+	0x65, 0x03, 0x04, 0x02, 0x04, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03,
+	0x04, 0x02, 0x01, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02,
+	0x02, 0x09, 0x60, 0x86, 0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x07,
+	0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x02, 0x01, 0x08, 0x2A, 0x86, 0x48, 0xCE,
+	0x3D, 0x03, 0x01, 0x07, 0x05, 0x2B, 0x81, 0x04, 0x00, 0x22, 0x05, 0x2B,
+	0x81, 0x04, 0x00, 0x23, 0x07, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x01,
+	0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x01, 0x08, 0x2A, 0x86,
+	0x48, 0xCE, 0x3D, 0x04, 0x03, 0x02, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D,
+	0x04, 0x03, 0x03, 0x08, 0x2A, 0x86, 0x48, 0xCE, 0x3D, 0x04, 0x03, 0x04,
+	0x03, 0x55, 0x04, 0x03, 0x00, 0x1F, 0x03, 0xFC, 0x07, 0x7F, 0x0B, 0x5E,
+	0x0F, 0x1F, 0x12, 0xFE, 0x16, 0xBF, 0x1A, 0x9F, 0x1E, 0x7E, 0x22, 0x3F,
+	0x26, 0x1E, 0x29, 0xDF, 0x00, 0x1F, 0x03, 0xFD, 0x07, 0x9F, 0x0B, 0x7E,
+	0x0F, 0x3F, 0x13, 0x1E, 0x16, 0xDF, 0x1A, 0xBF, 0x1E, 0x9E, 0x22, 0x5F,
+	0x26, 0x3E, 0x29, 0xFF, 0x03, 0x55, 0x1D, 0x13, 0x03, 0x55, 0x1D, 0x0F,
+	0x03, 0x55, 0x1D, 0x11, 0x03, 0x55, 0x1D, 0x20, 0x08, 0x2B, 0x06, 0x01,
+	0x05, 0x05, 0x07, 0x02, 0x01, 0x03, 0x55, 0x1D, 0x23, 0x03, 0x55, 0x1D,
+	0x0E, 0x03, 0x55, 0x1D, 0x12, 0x03, 0x55, 0x1D, 0x09, 0x03, 0x55, 0x1D,
+	0x1F, 0x03, 0x55, 0x1D, 0x2E, 0x08, 0x2B, 0x06, 0x01, 0x05, 0x05, 0x07,
+	0x01, 0x01, 0x08, 0x2B, 0x06, 0x01, 0x05, 0x05, 0x07, 0x01, 0x0B
+};
+
+static const unsigned char t0_codeblock[] = {
+	0x00, 0x01, 0x00, 0x0D, 0x00, 0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x01,
+	0x00, 0x11, 0x00, 0x00, 0x01, 0x01, 0x09, 0x00, 0x00, 0x01, 0x01, 0x0A,
+	0x00, 0x00, 0x25, 0x25, 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_BOOLEAN), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_DN), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_SERVER_NAME), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TAG_CLASS), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TAG_VALUE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_BAD_TIME), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_CRITICAL_EXTENSION), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_DN_MISMATCH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_EXPIRED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_EXTRA_ELEMENT), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_FORBIDDEN_KEY_USAGE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INDEFINITE_LENGTH), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_INNER_TRUNC), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_LIMIT_EXCEEDED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_CA), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_CONSTRUCTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_NOT_PRIMITIVE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_OVERFLOW), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_PARTIAL_BYTE), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNEXPECTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_UNSUPPORTED), 0x00, 0x00, 0x01,
+	T0_INT1(BR_ERR_X509_WEAK_PUBLIC_KEY), 0x00, 0x00, 0x01,
+	T0_INT1(BR_KEYTYPE_EC), 0x00, 0x00, 0x01, T0_INT1(BR_KEYTYPE_RSA),
+	0x00, 0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_length)), 0x00,
+	0x00, 0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig_hash_len)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig_hash_oid)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, cert_sig_len)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, cert_signer_key_type)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, current_dn_hash)), 0x00, 0x00,
+	0x01, T0_INT2(offsetof(CONTEXT_NAME, key_usages)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(br_x509_minimal_context, pkey_data)), 0x01,
+	T0_INT2(BR_X509_BUFSIZE_KEY), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, min_rsa_size)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, next_dn_hash)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, num_certs)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, pad)), 0x00, 0x00, 0x01,
+	T0_INT2(offsetof(CONTEXT_NAME, saved_dn_hash)), 0x00, 0x00, 0x01, 0x80,
+	0x73, 0x00, 0x00, 0x01, 0x80, 0x7C, 0x00, 0x00, 0x01, 0x81, 0x02, 0x00,
+	0x00, 0x8F, 0x05, 0x05, 0x33, 0x41, 0x01, 0x00, 0x00, 0x33, 0x01, 0x0A,
+	0x0E, 0x09, 0x01, 0x9A, 0xFF, 0xB8, 0x00, 0x0A, 0x00, 0x00, 0x01, 0x82,
+	0x19, 0x00, 0x00, 0x01, 0x82, 0x01, 0x00, 0x00, 0x01, 0x81, 0x68, 0x00,
+	0x02, 0x03, 0x00, 0x03, 0x01, 0x26, 0x02, 0x01, 0x13, 0x3A, 0x02, 0x00,
+	0x0F, 0x15, 0x00, 0x00, 0x01, 0x81, 0x74, 0x00, 0x00, 0x05, 0x02, 0x51,
+	0x29, 0x00, 0x00, 0x06, 0x02, 0x52, 0x29, 0x00, 0x00, 0x01, 0x10, 0x74,
+	0x00, 0x00, 0x11, 0x05, 0x02, 0x55, 0x29, 0x71, 0x00, 0x00, 0x11, 0x05,
+	0x02, 0x55, 0x29, 0x72, 0x00, 0x00, 0x06, 0x02, 0x4B, 0x29, 0x00, 0x00,
+	0x01, 0x82, 0x11, 0x00, 0x00, 0x26, 0x21, 0x01, 0x08, 0x0E, 0x3A, 0x3F,
+	0x21, 0x09, 0x00, 0x0B, 0x03, 0x00, 0x5A, 0x2B, 0xAC, 0x38, 0xAC, 0xB0,
+	0x26, 0x01, 0x20, 0x11, 0x06, 0x11, 0x25, 0x71, 0xAA, 0xB0, 0x01, 0x02,
+	0x75, 0xAD, 0x01, 0x02, 0x12, 0x06, 0x02, 0x56, 0x29, 0x76, 0xB0, 0x01,
+	0x02, 0x75, 0xAB, 0xAC, 0xBF, 0x99, 0x64, 0x60, 0x22, 0x16, 0xAC, 0xA4,
+	0x03, 0x01, 0x03, 0x02, 0xA4, 0x02, 0x02, 0x02, 0x01, 0x19, 0x06, 0x02,
+	0x4A, 0x29, 0x76, 0x02, 0x00, 0x06, 0x05, 0x9A, 0x03, 0x03, 0x04, 0x09,
+	0x99, 0x60, 0x67, 0x22, 0x28, 0x05, 0x02, 0x49, 0x29, 0x67, 0x64, 0x22,
+	0x16, 0xAC, 0xAC, 0x9B, 0x05, 0x02, 0x56, 0x29, 0xB9, 0x27, 0x06, 0x27,
+	0xBF, 0xA1, 0xAC, 0x62, 0xA7, 0x03, 0x05, 0x62, 0x3A, 0x02, 0x05, 0x09,
+	0x3A, 0x02, 0x05, 0x0A, 0xA7, 0x03, 0x06, 0x76, 0x63, 0x2A, 0x01, 0x81,
+	0x00, 0x09, 0x02, 0x05, 0x12, 0x06, 0x02, 0x57, 0x29, 0x76, 0x59, 0x03,
+	0x04, 0x04, 0x3A, 0x85, 0x27, 0x06, 0x34, 0x9B, 0x05, 0x02, 0x56, 0x29,
+	0x68, 0x27, 0x06, 0x04, 0x01, 0x17, 0x04, 0x12, 0x69, 0x27, 0x06, 0x04,
+	0x01, 0x18, 0x04, 0x0A, 0x6A, 0x27, 0x06, 0x04, 0x01, 0x19, 0x04, 0x02,
+	0x56, 0x29, 0x03, 0x07, 0x76, 0xA1, 0x26, 0x03, 0x08, 0x26, 0x62, 0x33,
+	0x0D, 0x06, 0x02, 0x4F, 0x29, 0xA2, 0x58, 0x03, 0x04, 0x04, 0x02, 0x56,
+	0x29, 0x76, 0x02, 0x00, 0x06, 0x21, 0x02, 0x04, 0x59, 0x30, 0x11, 0x06,
+	0x08, 0x25, 0x02, 0x05, 0x02, 0x06, 0x1E, 0x04, 0x10, 0x58, 0x30, 0x11,
+	0x06, 0x08, 0x25, 0x02, 0x07, 0x02, 0x08, 0x1D, 0x04, 0x03, 0x56, 0x29,
+	0x25, 0x04, 0x24, 0x02, 0x04, 0x59, 0x30, 0x11, 0x06, 0x08, 0x25, 0x02,
+	0x05, 0x02, 0x06, 0x24, 0x04, 0x10, 0x58, 0x30, 0x11, 0x06, 0x08, 0x25,
+	0x02, 0x07, 0x02, 0x08, 0x23, 0x04, 0x03, 0x56, 0x29, 0x25, 0x26, 0x06,
+	0x01, 0x29, 0x25, 0x01, 0x00, 0x03, 0x09, 0xB1, 0x01, 0x21, 0x8C, 0x01,
+	0x22, 0x8C, 0x26, 0x01, 0x23, 0x11, 0x06, 0x81, 0x26, 0x25, 0x71, 0xAA,
+	0xAC, 0x26, 0x06, 0x81, 0x1A, 0x01, 0x00, 0x03, 0x0A, 0xAC, 0x9B, 0x25,
+	0xB0, 0x26, 0x01, 0x01, 0x11, 0x06, 0x04, 0xA3, 0x03, 0x0A, 0xB0, 0x01,
+	0x04, 0x75, 0xAA, 0x6E, 0x27, 0x06, 0x0F, 0x02, 0x00, 0x06, 0x03, 0xC0,
+	0x04, 0x05, 0x96, 0x01, 0x7F, 0x03, 0x09, 0x04, 0x80, 0x6C, 0x8E, 0x27,
+	0x06, 0x06, 0x02, 0x00, 0x98, 0x04, 0x80, 0x62, 0xC2, 0x27, 0x06, 0x11,
+	0x02, 0x00, 0x06, 0x09, 0x01, 0x00, 0x03, 0x03, 0x95, 0x03, 0x03, 0x04,
+	0x01, 0xC0, 0x04, 0x80, 0x4D, 0x70, 0x27, 0x06, 0x0A, 0x02, 0x0A, 0x06,
+	0x03, 0x97, 0x04, 0x01, 0xC0, 0x04, 0x3F, 0x6D, 0x27, 0x06, 0x03, 0xC0,
+	0x04, 0x38, 0xC5, 0x27, 0x06, 0x03, 0xC0, 0x04, 0x31, 0x8D, 0x27, 0x06,
+	0x03, 0xC0, 0x04, 0x2A, 0xC3, 0x27, 0x06, 0x03, 0xC0, 0x04, 0x23, 0x77,
+	0x27, 0x06, 0x03, 0xC0, 0x04, 0x1C, 0x82, 0x27, 0x06, 0x03, 0xC0, 0x04,
+	0x15, 0x6C, 0x27, 0x06, 0x03, 0xC0, 0x04, 0x0E, 0xC4, 0x27, 0x06, 0x03,
+	0xC0, 0x04, 0x07, 0x02, 0x0A, 0x06, 0x02, 0x48, 0x29, 0xC0, 0x76, 0x76,
+	0x04, 0xFE, 0x62, 0x76, 0x76, 0x04, 0x08, 0x01, 0x7F, 0x11, 0x05, 0x02,
+	0x55, 0x29, 0x25, 0x76, 0x39, 0x02, 0x00, 0x06, 0x08, 0x02, 0x03, 0x3B,
+	0x2F, 0x05, 0x02, 0x44, 0x29, 0x02, 0x00, 0x06, 0x01, 0x17, 0x02, 0x00,
+	0x02, 0x09, 0x2F, 0x05, 0x02, 0x50, 0x29, 0xB0, 0x73, 0xAA, 0x9B, 0x06,
+	0x80, 0x77, 0xBA, 0x27, 0x06, 0x07, 0x01, 0x02, 0x59, 0x87, 0x04, 0x80,
+	0x5E, 0xBB, 0x27, 0x06, 0x07, 0x01, 0x03, 0x59, 0x88, 0x04, 0x80, 0x53,
+	0xBC, 0x27, 0x06, 0x07, 0x01, 0x04, 0x59, 0x89, 0x04, 0x80, 0x48, 0xBD,
+	0x27, 0x06, 0x06, 0x01, 0x05, 0x59, 0x8A, 0x04, 0x3E, 0xBE, 0x27, 0x06,
+	0x06, 0x01, 0x06, 0x59, 0x8B, 0x04, 0x34, 0x7C, 0x27, 0x06, 0x06, 0x01,
+	0x02, 0x58, 0x87, 0x04, 0x2A, 0x7D, 0x27, 0x06, 0x06, 0x01, 0x03, 0x58,
+	0x88, 0x04, 0x20, 0x7E, 0x27, 0x06, 0x06, 0x01, 0x04, 0x58, 0x89, 0x04,
+	0x16, 0x7F, 0x27, 0x06, 0x06, 0x01, 0x05, 0x58, 0x8A, 0x04, 0x0C, 0x80,
+	0x27, 0x06, 0x06, 0x01, 0x06, 0x58, 0x8B, 0x04, 0x02, 0x56, 0x29, 0x5D,
+	0x34, 0x5F, 0x36, 0x1C, 0x26, 0x05, 0x02, 0x56, 0x29, 0x5C, 0x36, 0x04,
+	0x02, 0x56, 0x29, 0xBF, 0xA1, 0x26, 0x01, T0_INT2(BR_X509_BUFSIZE_SIG),
+	0x12, 0x06, 0x02, 0x4F, 0x29, 0x26, 0x5E, 0x34, 0x5B, 0xA2, 0x76, 0x76,
+	0x01, 0x00, 0x5A, 0x35, 0x18, 0x00, 0x00, 0x01, 0x30, 0x0A, 0x26, 0x01,
+	0x00, 0x01, 0x09, 0x6F, 0x05, 0x02, 0x47, 0x29, 0x00, 0x00, 0x30, 0x30,
+	0x00, 0x00, 0x01, 0x81, 0x08, 0x00, 0x00, 0x01, 0x81, 0x10, 0x00, 0x00,
+	0x01, 0x81, 0x19, 0x00, 0x00, 0x01, 0x81, 0x22, 0x00, 0x00, 0x01, 0x81,
+	0x2B, 0x00, 0x01, 0x7B, 0x01, 0x01, 0x11, 0x3A, 0x01, 0x83, 0xFD, 0x7F,
+	0x11, 0x15, 0x06, 0x03, 0x3A, 0x25, 0x00, 0x3A, 0x26, 0x03, 0x00, 0x26,
+	0xC6, 0x05, 0x04, 0x41, 0x01, 0x00, 0x00, 0x26, 0x01, 0x81, 0x00, 0x0D,
+	0x06, 0x04, 0x93, 0x04, 0x80, 0x49, 0x26, 0x01, 0x90, 0x00, 0x0D, 0x06,
+	0x0F, 0x01, 0x06, 0x14, 0x01, 0x81, 0x40, 0x2F, 0x93, 0x02, 0x00, 0x01,
+	0x00, 0x94, 0x04, 0x33, 0x26, 0x01, 0x83, 0xFF, 0x7F, 0x0D, 0x06, 0x14,
+	0x01, 0x0C, 0x14, 0x01, 0x81, 0x60, 0x2F, 0x93, 0x02, 0x00, 0x01, 0x06,
+	0x94, 0x02, 0x00, 0x01, 0x00, 0x94, 0x04, 0x17, 0x01, 0x12, 0x14, 0x01,
+	0x81, 0x70, 0x2F, 0x93, 0x02, 0x00, 0x01, 0x0C, 0x94, 0x02, 0x00, 0x01,
+	0x06, 0x94, 0x02, 0x00, 0x01, 0x00, 0x94, 0x00, 0x00, 0x01, 0x82, 0x15,
+	0x00, 0x00, 0x26, 0x01, 0x83, 0xB0, 0x00, 0x01, 0x83, 0xB7, 0x7F, 0x6F,
+	0x00, 0x00, 0x01, 0x81, 0x34, 0x00, 0x00, 0x01, 0x80, 0x6B, 0x00, 0x00,
+	0x01, 0x81, 0x78, 0x00, 0x00, 0x01, 0x3D, 0x00, 0x00, 0x01, 0x80, 0x43,
+	0x00, 0x00, 0x01, 0x80, 0x4D, 0x00, 0x00, 0x01, 0x80, 0x57, 0x00, 0x00,
+	0x01, 0x80, 0x61, 0x00, 0x00, 0x30, 0x11, 0x06, 0x04, 0x41, 0xAA, 0xBF,
+	0xB1, 0x00, 0x00, 0x01, 0x82, 0x09, 0x00, 0x00, 0x01, 0x81, 0x6C, 0x00,
+	0x00, 0x26, 0x01, 0x83, 0xB8, 0x00, 0x01, 0x83, 0xBF, 0x7F, 0x6F, 0x00,
+	0x00, 0x01, 0x30, 0x61, 0x36, 0x01, 0x7F, 0x79, 0x1A, 0x01, 0x00, 0x79,
+	0x1A, 0x04, 0x7A, 0x00, 0x01, 0x81, 0x38, 0x00, 0x01, 0x7B, 0x0D, 0x06,
+	0x02, 0x4E, 0x29, 0x26, 0x03, 0x00, 0x0A, 0x02, 0x00, 0x00, 0x00, 0x30,
+	0x26, 0x3E, 0x3A, 0x01, 0x82, 0x00, 0x13, 0x2F, 0x06, 0x04, 0x41, 0x01,
+	0x00, 0x00, 0x30, 0x66, 0x09, 0x36, 0x3F, 0x00, 0x00, 0x14, 0x01, 0x3F,
+	0x15, 0x01, 0x81, 0x00, 0x2F, 0x93, 0x00, 0x02, 0x01, 0x00, 0x03, 0x00,
+	0xAC, 0x26, 0x06, 0x80, 0x59, 0xB0, 0x01, 0x20, 0x30, 0x11, 0x06, 0x17,
+	0x25, 0x71, 0xAA, 0x9B, 0x25, 0x01, 0x7F, 0x2E, 0x03, 0x01, 0xB0, 0x01,
+	0x20, 0x74, 0xAA, 0xAF, 0x02, 0x01, 0x20, 0x76, 0x76, 0x04, 0x38, 0x01,
+	0x21, 0x30, 0x11, 0x06, 0x08, 0x25, 0x72, 0xB3, 0x01, 0x01, 0x1F, 0x04,
+	0x2A, 0x01, 0x22, 0x30, 0x11, 0x06, 0x11, 0x25, 0x72, 0xB3, 0x26, 0x06,
+	0x06, 0x2C, 0x02, 0x00, 0x2F, 0x03, 0x00, 0x01, 0x02, 0x1F, 0x04, 0x13,
+	0x01, 0x26, 0x30, 0x11, 0x06, 0x08, 0x25, 0x72, 0xB3, 0x01, 0x06, 0x1F,
+	0x04, 0x05, 0x41, 0xAB, 0x01, 0x00, 0x25, 0x04, 0xFF, 0x23, 0x76, 0x02,
+	0x00, 0x00, 0x00, 0xAC, 0xB1, 0x26, 0x01, 0x01, 0x11, 0x06, 0x08, 0xA3,
+	0x05, 0x02, 0x50, 0x29, 0xB1, 0x04, 0x02, 0x50, 0x29, 0x26, 0x01, 0x02,
+	0x11, 0x06, 0x0C, 0x25, 0x72, 0xAD, 0x65, 0x2B, 0x40, 0x0D, 0x06, 0x02,
+	0x50, 0x29, 0xB1, 0x01, 0x7F, 0x10, 0x06, 0x02, 0x55, 0x29, 0x25, 0x76,
+	0x00, 0x00, 0xAC, 0x26, 0x06, 0x1A, 0xAC, 0x9B, 0x25, 0x26, 0x06, 0x11,
+	0xAC, 0x26, 0x06, 0x0C, 0xAC, 0x9B, 0x25, 0x86, 0x27, 0x05, 0x02, 0x48,
+	0x29, 0xBF, 0x04, 0x71, 0x76, 0x76, 0x04, 0x63, 0x76, 0x00, 0x02, 0x03,
+	0x00, 0xB0, 0x01, 0x03, 0x75, 0xAA, 0xB7, 0x03, 0x01, 0x02, 0x01, 0x01,
+	0x07, 0x12, 0x06, 0x02, 0x55, 0x29, 0x26, 0x01, 0x00, 0x30, 0x11, 0x06,
+	0x05, 0x25, 0x4C, 0x29, 0x04, 0x15, 0x01, 0x01, 0x30, 0x11, 0x06, 0x0A,
+	0x25, 0xB7, 0x02, 0x01, 0x14, 0x02, 0x01, 0x0E, 0x04, 0x05, 0x25, 0xB7,
+	0x01, 0x00, 0x25, 0x02, 0x00, 0x06, 0x19, 0x01, 0x00, 0x30, 0x01, 0x38,
+	0x15, 0x06, 0x03, 0x01, 0x10, 0x2F, 0x3A, 0x01, 0x81, 0x40, 0x15, 0x06,
+	0x03, 0x01, 0x20, 0x2F, 0x61, 0x36, 0x04, 0x07, 0x01, 0x04, 0x15, 0x05,
+	0x02, 0x4C, 0x29, 0xBF, 0x00, 0x00, 0x37, 0xAC, 0xBF, 0x1B, 0x00, 0x03,
+	0x01, 0x00, 0x03, 0x00, 0x37, 0xAC, 0x26, 0x06, 0x30, 0xB0, 0x01, 0x11,
+	0x74, 0xAA, 0x26, 0x05, 0x02, 0x43, 0x29, 0x26, 0x06, 0x20, 0xAC, 0x9B,
+	0x25, 0x84, 0x27, 0x03, 0x01, 0x01, 0x00, 0x2E, 0x03, 0x02, 0xAF, 0x26,
+	0x02, 0x01, 0x15, 0x06, 0x07, 0x2C, 0x06, 0x04, 0x01, 0x7F, 0x03, 0x00,
+	0x02, 0x02, 0x20, 0x76, 0x04, 0x5D, 0x76, 0x04, 0x4D, 0x76, 0x1B, 0x02,
+	0x00, 0x00, 0x00, 0xB0, 0x01, 0x06, 0x75, 0xAE, 0x00, 0x00, 0xB5, 0x83,
+	0x06, 0x0E, 0x3A, 0x26, 0x05, 0x06, 0x41, 0x01, 0x00, 0x01, 0x00, 0x00,
+	0xB5, 0x6B, 0x04, 0x08, 0x8F, 0x06, 0x05, 0x25, 0x01, 0x00, 0x04, 0x00,
+	0x00, 0x00, 0xB6, 0x83, 0x06, 0x0E, 0x3A, 0x26, 0x05, 0x06, 0x41, 0x01,
+	0x00, 0x01, 0x00, 0x00, 0xB6, 0x6B, 0x04, 0x08, 0x8F, 0x06, 0x05, 0x25,
+	0x01, 0x00, 0x04, 0x00, 0x00, 0x00, 0xB7, 0x26, 0x01, 0x81, 0x00, 0x0D,
+	0x06, 0x04, 0x00, 0x04, 0x80, 0x55, 0x26, 0x01, 0x81, 0x40, 0x0D, 0x06,
+	0x07, 0x25, 0x01, 0x00, 0x00, 0x04, 0x80, 0x47, 0x26, 0x01, 0x81, 0x60,
+	0x0D, 0x06, 0x0E, 0x01, 0x1F, 0x15, 0x01, 0x01, 0xA0, 0x01, 0x81, 0x00,
+	0x01, 0x8F, 0x7F, 0x04, 0x32, 0x26, 0x01, 0x81, 0x70, 0x0D, 0x06, 0x0F,
+	0x01, 0x0F, 0x15, 0x01, 0x02, 0xA0, 0x01, 0x90, 0x00, 0x01, 0x83, 0xFF,
+	0x7F, 0x04, 0x1C, 0x26, 0x01, 0x81, 0x78, 0x0D, 0x06, 0x11, 0x01, 0x07,
+	0x15, 0x01, 0x03, 0xA0, 0x01, 0x84, 0x80, 0x00, 0x01, 0x80, 0xC3, 0xFF,
+	0x7F, 0x04, 0x04, 0x25, 0x01, 0x00, 0x00, 0x6F, 0x05, 0x03, 0x25, 0x01,
+	0x00, 0x00, 0x00, 0x3A, 0x26, 0x05, 0x06, 0x41, 0x01, 0x00, 0x01, 0x7F,
+	0x00, 0xB7, 0x33, 0x26, 0x3C, 0x06, 0x03, 0x3A, 0x25, 0x00, 0x01, 0x06,
+	0x0E, 0x3A, 0x26, 0x01, 0x06, 0x14, 0x01, 0x02, 0x10, 0x06, 0x04, 0x41,
+	0x01, 0x7F, 0x00, 0x01, 0x3F, 0x15, 0x09, 0x00, 0x00, 0x26, 0x06, 0x06,
+	0x0B, 0x9F, 0x33, 0x40, 0x04, 0x77, 0x25, 0x26, 0x00, 0x00, 0xB0, 0x01,
+	0x03, 0x75, 0xAA, 0xB7, 0x06, 0x02, 0x54, 0x29, 0x00, 0x00, 0x3A, 0x26,
+	0x06, 0x07, 0x31, 0x26, 0x06, 0x01, 0x1A, 0x04, 0x76, 0x41, 0x00, 0x00,
+	0x01, 0x01, 0x75, 0xA9, 0x01, 0x01, 0x10, 0x06, 0x02, 0x42, 0x29, 0xB7,
+	0x3D, 0x00, 0x04, 0xB0, 0x26, 0x01, 0x17, 0x01, 0x18, 0x6F, 0x05, 0x02,
+	0x47, 0x29, 0x01, 0x18, 0x11, 0x03, 0x00, 0x72, 0xAA, 0xA5, 0x02, 0x00,
+	0x06, 0x0C, 0x01, 0x80, 0x64, 0x08, 0x03, 0x01, 0xA5, 0x02, 0x01, 0x09,
+	0x04, 0x0E, 0x26, 0x01, 0x32, 0x0D, 0x06, 0x04, 0x01, 0x80, 0x64, 0x09,
+	0x01, 0x8E, 0x6C, 0x09, 0x03, 0x01, 0x02, 0x01, 0x01, 0x82, 0x6D, 0x08,
+	0x02, 0x01, 0x01, 0x03, 0x09, 0x01, 0x04, 0x0C, 0x09, 0x02, 0x01, 0x01,
+	0x80, 0x63, 0x09, 0x01, 0x80, 0x64, 0x0C, 0x0A, 0x02, 0x01, 0x01, 0x83,
+	0x0F, 0x09, 0x01, 0x83, 0x10, 0x0C, 0x09, 0x03, 0x03, 0x01, 0x01, 0x01,
+	0x0C, 0xA6, 0x40, 0x01, 0x01, 0x0E, 0x02, 0x01, 0x01, 0x04, 0x07, 0x3E,
+	0x02, 0x01, 0x01, 0x80, 0x64, 0x07, 0x3D, 0x02, 0x01, 0x01, 0x83, 0x10,
+	0x07, 0x3E, 0x2F, 0x15, 0x06, 0x03, 0x01, 0x18, 0x09, 0x91, 0x09, 0x78,
+	0x26, 0x01, 0x05, 0x14, 0x02, 0x03, 0x09, 0x03, 0x03, 0x01, 0x1F, 0x15,
+	0x01, 0x01, 0x3A, 0xA6, 0x02, 0x03, 0x09, 0x40, 0x03, 0x03, 0x01, 0x00,
+	0x01, 0x17, 0xA6, 0x01, 0x9C, 0x10, 0x08, 0x03, 0x02, 0x01, 0x00, 0x01,
+	0x3B, 0xA6, 0x01, 0x3C, 0x08, 0x02, 0x02, 0x09, 0x03, 0x02, 0x01, 0x00,
+	0x01, 0x3C, 0xA6, 0x02, 0x02, 0x09, 0x03, 0x02, 0xB7, 0x26, 0x01, 0x2E,
+	0x11, 0x06, 0x0D, 0x25, 0xB7, 0x26, 0x01, 0x30, 0x01, 0x39, 0x6F, 0x06,
+	0x03, 0x25, 0x04, 0x74, 0x01, 0x80, 0x5A, 0x10, 0x06, 0x02, 0x47, 0x29,
+	0x76, 0x02, 0x03, 0x02, 0x02, 0x00, 0x01, 0xB7, 0x7A, 0x01, 0x0A, 0x08,
+	0x03, 0x00, 0xB7, 0x7A, 0x02, 0x00, 0x09, 0x00, 0x02, 0x03, 0x00, 0x03,
+	0x01, 0xA5, 0x26, 0x02, 0x01, 0x02, 0x00, 0x6F, 0x05, 0x02, 0x47, 0x29,
+	0x00, 0x00, 0x33, 0xB0, 0x01, 0x02, 0x75, 0x0B, 0xA8, 0x00, 0x03, 0x26,
+	0x03, 0x00, 0x03, 0x01, 0x03, 0x02, 0xAA, 0xB7, 0x26, 0x01, 0x81, 0x00,
+	0x13, 0x06, 0x02, 0x53, 0x29, 0x26, 0x01, 0x00, 0x11, 0x06, 0x0B, 0x25,
+	0x26, 0x05, 0x04, 0x25, 0x01, 0x00, 0x00, 0xB7, 0x04, 0x6F, 0x02, 0x01,
+	0x26, 0x05, 0x02, 0x4F, 0x29, 0x40, 0x03, 0x01, 0x02, 0x02, 0x36, 0x02,
+	0x02, 0x3F, 0x03, 0x02, 0x26, 0x06, 0x03, 0xB7, 0x04, 0x68, 0x25, 0x02,
+	0x00, 0x02, 0x01, 0x0A, 0x00, 0x01, 0xB7, 0x26, 0x01, 0x81, 0x00, 0x0D,
+	0x06, 0x01, 0x00, 0x01, 0x81, 0x00, 0x0A, 0x26, 0x05, 0x02, 0x4D, 0x29,
+	0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x12, 0x06, 0x19, 0x02,
+	0x00, 0x40, 0x03, 0x00, 0x26, 0x01, 0x83, 0xFF, 0xFF, 0x7F, 0x12, 0x06,
+	0x02, 0x4E, 0x29, 0x01, 0x08, 0x0E, 0x3A, 0xB7, 0x33, 0x09, 0x04, 0x60,
+	0x00, 0x00, 0xA9, 0x92, 0x00, 0x00, 0xAA, 0xBF, 0x00, 0x00, 0xB0, 0x73,
+	0xAA, 0x00, 0x01, 0xAA, 0x26, 0x05, 0x02, 0x53, 0x29, 0xB7, 0x26, 0x01,
+	0x81, 0x00, 0x13, 0x06, 0x02, 0x53, 0x29, 0x03, 0x00, 0x26, 0x06, 0x16,
+	0xB7, 0x02, 0x00, 0x26, 0x01, 0x87, 0xFF, 0xFF, 0x7F, 0x13, 0x06, 0x02,
+	0x53, 0x29, 0x01, 0x08, 0x0E, 0x09, 0x03, 0x00, 0x04, 0x67, 0x25, 0x02,
+	0x00, 0x00, 0x00, 0xAA, 0x26, 0x01, 0x81, 0x7F, 0x12, 0x06, 0x08, 0xBF,
+	0x01, 0x00, 0x66, 0x36, 0x01, 0x00, 0x00, 0x26, 0x66, 0x36, 0x66, 0x3F,
+	0xA2, 0x01, 0x7F, 0x00, 0x00, 0xB0, 0x01, 0x0C, 0x30, 0x11, 0x06, 0x05,
+	0x25, 0x72, 0xB3, 0x04, 0x3E, 0x01, 0x12, 0x30, 0x11, 0x06, 0x05, 0x25,
+	0x72, 0xB4, 0x04, 0x33, 0x01, 0x13, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72,
+	0xB4, 0x04, 0x28, 0x01, 0x14, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, 0xB4,
+	0x04, 0x1D, 0x01, 0x16, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, 0xB4, 0x04,
+	0x12, 0x01, 0x1E, 0x30, 0x11, 0x06, 0x05, 0x25, 0x72, 0xB2, 0x04, 0x07,
+	0x41, 0xAB, 0x01, 0x00, 0x01, 0x00, 0x25, 0x00, 0x01, 0xB7, 0x03, 0x00,
+	0x02, 0x00, 0x01, 0x05, 0x14, 0x01, 0x01, 0x15, 0x2D, 0x02, 0x00, 0x01,
+	0x06, 0x14, 0x26, 0x01, 0x01, 0x15, 0x06, 0x02, 0x45, 0x29, 0x01, 0x04,
+	0x0E, 0x02, 0x00, 0x01, 0x1F, 0x15, 0x26, 0x01, 0x1F, 0x11, 0x06, 0x02,
+	0x46, 0x29, 0x09, 0x00, 0x00, 0x26, 0x05, 0x05, 0x01, 0x00, 0x01, 0x7F,
+	0x00, 0xB0, 0x00, 0x01, 0xAA, 0x26, 0x05, 0x05, 0x66, 0x36, 0x01, 0x7F,
+	0x00, 0x01, 0x01, 0x03, 0x00, 0x9C, 0x26, 0x01, 0x83, 0xFF, 0x7E, 0x11,
+	0x06, 0x16, 0x25, 0x26, 0x06, 0x10, 0x9D, 0x26, 0x05, 0x05, 0x25, 0xBF,
+	0x01, 0x00, 0x00, 0x02, 0x00, 0x81, 0x03, 0x00, 0x04, 0x6D, 0x04, 0x1B,
+	0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, 0x00, 0x02, 0x00, 0x81, 0x03,
+	0x00, 0x26, 0x06, 0x0B, 0x9C, 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00,
+	0x00, 0x04, 0x6D, 0x25, 0x02, 0x00, 0x26, 0x05, 0x01, 0x00, 0x40, 0x66,
+	0x36, 0x01, 0x7F, 0x00, 0x01, 0xAA, 0x01, 0x01, 0x03, 0x00, 0x26, 0x06,
+	0x10, 0x9E, 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, 0x00, 0x02, 0x00,
+	0x81, 0x03, 0x00, 0x04, 0x6D, 0x25, 0x02, 0x00, 0x26, 0x05, 0x01, 0x00,
+	0x40, 0x66, 0x36, 0x01, 0x7F, 0x00, 0x01, 0xAA, 0x01, 0x01, 0x03, 0x00,
+	0x26, 0x06, 0x10, 0xB7, 0x26, 0x05, 0x05, 0x25, 0xBF, 0x01, 0x00, 0x00,
+	0x02, 0x00, 0x81, 0x03, 0x00, 0x04, 0x6D, 0x25, 0x02, 0x00, 0x26, 0x05,
+	0x01, 0x00, 0x40, 0x66, 0x36, 0x01, 0x7F, 0x00, 0x00, 0xB7, 0x01, 0x08,
+	0x0E, 0x3A, 0xB7, 0x33, 0x09, 0x00, 0x00, 0xB7, 0x3A, 0xB7, 0x01, 0x08,
+	0x0E, 0x33, 0x09, 0x00, 0x00, 0x26, 0x05, 0x02, 0x4E, 0x29, 0x40, 0xB8,
+	0x00, 0x00, 0x32, 0x26, 0x01, 0x00, 0x13, 0x06, 0x01, 0x00, 0x25, 0x1A,
+	0x04, 0x74, 0x00, 0x01, 0x01, 0x00, 0x00, 0x01, 0x0B, 0x00, 0x00, 0x01,
+	0x15, 0x00, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x01, 0x29, 0x00, 0x00, 0x01,
+	0x33, 0x00, 0x00, 0xC0, 0x25, 0x00, 0x00, 0x26, 0x06, 0x07, 0xC1, 0x26,
+	0x06, 0x01, 0x1A, 0x04, 0x76, 0x00, 0x00, 0x01, 0x00, 0x30, 0x31, 0x0B,
+	0x41, 0x00, 0x00, 0x01, 0x81, 0x70, 0x00, 0x00, 0x01, 0x82, 0x0D, 0x00,
+	0x00, 0x01, 0x82, 0x22, 0x00, 0x00, 0x01, 0x82, 0x05, 0x00, 0x00, 0x26,
+	0x01, 0x83, 0xFB, 0x50, 0x01, 0x83, 0xFB, 0x6F, 0x6F, 0x06, 0x04, 0x25,
+	0x01, 0x00, 0x00, 0x26, 0x01, 0x83, 0xB0, 0x00, 0x01, 0x83, 0xBF, 0x7F,
+	0x6F, 0x06, 0x04, 0x25, 0x01, 0x00, 0x00, 0x01, 0x83, 0xFF, 0x7F, 0x15,
+	0x01, 0x83, 0xFF, 0x7E, 0x0D, 0x00
+};
+
+static const uint16_t t0_caddr[] = {
+	0,
+	5,
+	10,
+	15,
+	20,
+	25,
+	29,
+	33,
+	37,
+	41,
+	45,
+	49,
+	53,
+	57,
+	61,
+	65,
+	69,
+	73,
+	77,
+	81,
+	85,
+	89,
+	93,
+	97,
+	101,
+	105,
+	109,
+	113,
+	117,
+	121,
+	125,
+	130,
+	135,
+	140,
+	145,
+	150,
+	155,
+	160,
+	165,
+	173,
+	178,
+	183,
+	188,
+	193,
+	198,
+	203,
+	208,
+	213,
+	234,
+	239,
+	244,
+	249,
+	264,
+	269,
+	275,
+	281,
+	286,
+	294,
+	302,
+	308,
+	313,
+	324,
+	960,
+	975,
+	979,
+	984,
+	989,
+	994,
+	999,
+	1004,
+	1118,
+	1123,
+	1135,
+	1140,
+	1145,
+	1150,
+	1154,
+	1159,
+	1164,
+	1169,
+	1174,
+	1184,
+	1189,
+	1194,
+	1206,
+	1221,
+	1226,
+	1240,
+	1262,
+	1273,
+	1376,
+	1423,
+	1456,
+	1547,
+	1553,
+	1616,
+	1623,
+	1651,
+	1679,
+	1784,
+	1826,
+	1839,
+	1851,
+	1865,
+	1880,
+	2100,
+	2114,
+	2131,
+	2140,
+	2207,
+	2263,
+	2267,
+	2271,
+	2276,
+	2324,
+	2350,
+	2426,
+	2470,
+	2481,
+	2566,
+	2604,
+	2642,
+	2652,
+	2662,
+	2671,
+	2684,
+	2688,
+	2692,
+	2696,
+	2700,
+	2704,
+	2708,
+	2712,
+	2724,
+	2732,
+	2737,
+	2742,
+	2747,
+	2752
+};
+
+#define T0_INTERPRETED   60
+
+#define T0_ENTER(ip, rp, slot)   do { \
+		const unsigned char *t0_newip; \
+		uint32_t t0_lnum; \
+		t0_newip = &t0_codeblock[t0_caddr[(slot) - T0_INTERPRETED]]; \
+		t0_lnum = t0_parse7E_unsigned(&t0_newip); \
+		(rp) += t0_lnum; \
+		*((rp) ++) = (uint32_t)((ip) - &t0_codeblock[0]) + (t0_lnum << 16); \
+		(ip) = t0_newip; \
+	} while (0)
+
+#define T0_DEFENTRY(name, slot) \
+void \
+name(void *ctx) \
+{ \
+	t0_context *t0ctx = ctx; \
+	t0ctx->ip = &t0_codeblock[0]; \
+	T0_ENTER(t0ctx->ip, t0ctx->rp, slot); \
+}
+
+T0_DEFENTRY(br_x509_minimal_init_main, 144)
+
+#define T0_NEXT(t0ipp)   (*(*(t0ipp)) ++)
+
+void
+br_x509_minimal_run(void *t0ctx)
+{
+	uint32_t *dp, *rp;
+	const unsigned char *ip;
+
+#define T0_LOCAL(x)    (*(rp - 2 - (x)))
+#define T0_POP()       (*-- dp)
+#define T0_POPi()      (*(int32_t *)(-- dp))
+#define T0_PEEK(x)     (*(dp - 1 - (x)))
+#define T0_PEEKi(x)    (*(int32_t *)(dp - 1 - (x)))
+#define T0_PUSH(v)     do { *dp = (v); dp ++; } while (0)
+#define T0_PUSHi(v)    do { *(int32_t *)dp = (v); dp ++; } while (0)
+#define T0_RPOP()      (*-- rp)
+#define T0_RPOPi()     (*(int32_t *)(-- rp))
+#define T0_RPUSH(v)    do { *rp = (v); rp ++; } while (0)
+#define T0_RPUSHi(v)   do { *(int32_t *)rp = (v); rp ++; } while (0)
+#define T0_ROLL(x)     do { \
+	size_t t0len = (size_t)(x); \
+	uint32_t t0tmp = *(dp - 1 - t0len); \
+	memmove(dp - t0len - 1, dp - t0len, t0len * sizeof *dp); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_SWAP()      do { \
+	uint32_t t0tmp = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_ROT()       do { \
+	uint32_t t0tmp = *(dp - 3); \
+	*(dp - 3) = *(dp - 2); \
+	*(dp - 2) = *(dp - 1); \
+	*(dp - 1) = t0tmp; \
+} while (0)
+#define T0_NROT()       do { \
+	uint32_t t0tmp = *(dp - 1); \
+	*(dp - 1) = *(dp - 2); \
+	*(dp - 2) = *(dp - 3); \
+	*(dp - 3) = t0tmp; \
+} while (0)
+#define T0_PICK(x)      do { \
+	uint32_t t0depth = (x); \
+	T0_PUSH(T0_PEEK(t0depth)); \
+} while (0)
+#define T0_CO()         do { \
+	goto t0_exit; \
+} while (0)
+#define T0_RET()        goto t0_next
+
+	dp = ((t0_context *)t0ctx)->dp;
+	rp = ((t0_context *)t0ctx)->rp;
+	ip = ((t0_context *)t0ctx)->ip;
+	goto t0_next;
+	for (;;) {
+		uint32_t t0x;
+
+	t0_next:
+		t0x = T0_NEXT(&ip);
+		if (t0x < T0_INTERPRETED) {
+			switch (t0x) {
+				int32_t t0off;
+
+			case 0: /* ret */
+				t0x = T0_RPOP();
+				rp -= (t0x >> 16);
+				t0x &= 0xFFFF;
+				if (t0x == 0) {
+					ip = NULL;
+					goto t0_exit;
+				}
+				ip = &t0_codeblock[t0x];
+				break;
+			case 1: /* literal constant */
+				T0_PUSHi(t0_parse7E_signed(&ip));
+				break;
+			case 2: /* read local */
+				T0_PUSH(T0_LOCAL(t0_parse7E_unsigned(&ip)));
+				break;
+			case 3: /* write local */
+				T0_LOCAL(t0_parse7E_unsigned(&ip)) = T0_POP();
+				break;
+			case 4: /* jump */
+				t0off = t0_parse7E_signed(&ip);
+				ip += t0off;
+				break;
+			case 5: /* jump if */
+				t0off = t0_parse7E_signed(&ip);
+				if (T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 6: /* jump if not */
+				t0off = t0_parse7E_signed(&ip);
+				if (!T0_POP()) {
+					ip += t0off;
+				}
+				break;
+			case 7: {
+				/* %25 */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSHi(a % b);
+
+				}
+				break;
+			case 8: {
+				/* * */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a * b);
+
+				}
+				break;
+			case 9: {
+				/* + */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a + b);
+
+				}
+				break;
+			case 10: {
+				/* - */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a - b);
+
+				}
+				break;
+			case 11: {
+				/* -rot */
+ T0_NROT(); 
+				}
+				break;
+			case 12: {
+				/* / */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSHi(a / b);
+
+				}
+				break;
+			case 13: {
+				/* < */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a < b));
+
+				}
+				break;
+			case 14: {
+				/* << */
+
+	int c = (int)T0_POPi();
+	uint32_t x = T0_POP();
+	T0_PUSH(x << c);
+
+				}
+				break;
+			case 15: {
+				/* <= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a <= b));
+
+				}
+				break;
+			case 16: {
+				/* <> */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a != b));
+
+				}
+				break;
+			case 17: {
+				/* = */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(-(uint32_t)(a == b));
+
+				}
+				break;
+			case 18: {
+				/* > */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a > b));
+
+				}
+				break;
+			case 19: {
+				/* >= */
+
+	int32_t b = T0_POPi();
+	int32_t a = T0_POPi();
+	T0_PUSH(-(uint32_t)(a >= b));
+
+				}
+				break;
+			case 20: {
+				/* >> */
+
+	int c = (int)T0_POPi();
+	int32_t x = T0_POPi();
+	T0_PUSHi(x >> c);
+
+				}
+				break;
+			case 21: {
+				/* and */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a & b);
+
+				}
+				break;
+			case 22: {
+				/* blobcopy */
+
+	size_t len = T0_POP();
+	unsigned char *src = (unsigned char *)CTX + T0_POP();
+	unsigned char *dst = (unsigned char *)CTX + T0_POP();
+	memcpy(dst, src, len);
+
+				}
+				break;
+			case 23: {
+				/* check-direct-trust */
+
+	size_t u;
+
+	for (u = 0; u < CTX->trust_anchors_num; u ++) {
+		const br_x509_trust_anchor *ta;
+		unsigned char hashed_DN[64];
+		int kt;
+
+		ta = &CTX->trust_anchors[u];
+		if (ta->flags & BR_X509_TA_CA) {
+			continue;
+		}
+		hash_dn(CTX, ta->dn.data, ta->dn.len, hashed_DN);
+		if (memcmp(hashed_DN, CTX->current_dn_hash, DNHASH_LEN)) {
+			continue;
+		}
+		kt = CTX->pkey.key_type;
+		if ((ta->pkey.key_type & 0x0F) != kt) {
+			continue;
+		}
+		switch (kt) {
+
+		case BR_KEYTYPE_RSA:
+			if (!eqbigint(CTX->pkey.key.rsa.n,
+				CTX->pkey.key.rsa.nlen,
+				ta->pkey.key.rsa.n,
+				ta->pkey.key.rsa.nlen)
+				|| !eqbigint(CTX->pkey.key.rsa.e,
+				CTX->pkey.key.rsa.elen,
+				ta->pkey.key.rsa.e,
+				ta->pkey.key.rsa.elen))
+			{
+				continue;
+			}
+			break;
+
+		case BR_KEYTYPE_EC:
+			if (CTX->pkey.key.ec.curve != ta->pkey.key.ec.curve
+				|| CTX->pkey.key.ec.qlen != ta->pkey.key.ec.qlen
+				|| memcmp(CTX->pkey.key.ec.q,
+					ta->pkey.key.ec.q,
+					ta->pkey.key.ec.qlen) != 0)
+			{
+				continue;
+			}
+			break;
+
+		default:
+			continue;
+		}
+
+		/*
+		 * Direct trust match!
+		 */
+		CTX->err = BR_ERR_X509_OK;
+		T0_CO();
+	}
+
+				}
+				break;
+			case 24: {
+				/* check-trust-anchor-CA */
+
+	size_t u;
+
+	for (u = 0; u < CTX->trust_anchors_num; u ++) {
+		const br_x509_trust_anchor *ta;
+		unsigned char hashed_DN[64];
+
+		ta = &CTX->trust_anchors[u];
+		if (!(ta->flags & BR_X509_TA_CA)) {
+			continue;
+		}
+		hash_dn(CTX, ta->dn.data, ta->dn.len, hashed_DN);
+		if (memcmp(hashed_DN, CTX->saved_dn_hash, DNHASH_LEN)) {
+			continue;
+		}
+		if (verify_signature(CTX, &ta->pkey) == 0) {
+			CTX->err = BR_ERR_X509_OK;
+			T0_CO();
+		}
+	}
+
+				}
+				break;
+			case 25: {
+				/* check-validity-range */
+
+	uint32_t nbs = T0_POP();
+	uint32_t nbd = T0_POP();
+	uint32_t nas = T0_POP();
+	uint32_t nad = T0_POP();
+	int r;
+	if (CTX->itime != 0) {
+		r = CTX->itime(CTX->itime_ctx, nbd, nbs, nad, nas);
+		if (r < -1 || r > 1) {
+			CTX->err = BR_ERR_X509_TIME_UNKNOWN;
+			T0_CO();
+		}
+	} else {
+		uint32_t vd = CTX->days;
+		uint32_t vs = CTX->seconds;
+		if (vd == 0 && vs == 0) {
+			CTX->err = BR_ERR_X509_TIME_UNKNOWN;
+			T0_CO();
+		}
+		if (vd < nbd || (vd == nbd && vs < nbs)) {
+			r = -1;
+		} else if (vd > nad || (vd == nad && vs > nas)) {
+			r = 1;
+		} else {
+			r = 0;
+		}
+	}
+	T0_PUSHi(r);
+
+				}
+				break;
+			case 26: {
+				/* co */
+ T0_CO(); 
+				}
+				break;
+			case 27: {
+				/* compute-dn-hash */
+
+	CTX->dn_hash_impl->out(&CTX->dn_hash.vtable, CTX->current_dn_hash);
+	CTX->do_dn_hash = 0;
+
+				}
+				break;
+			case 28: {
+				/* compute-tbs-hash */
+
+	int id = T0_POPi();
+	size_t len;
+	len = br_multihash_out(&CTX->mhash, id, CTX->tbs_hash);
+	T0_PUSH(len);
+
+				}
+				break;
+			case 29: {
+				/* copy-ee-ec-pkey */
+
+	size_t qlen = T0_POP();
+	uint32_t curve = T0_POP();
+	memcpy(CTX->ee_pkey_data, CTX->pkey_data, qlen);
+	CTX->pkey.key_type = BR_KEYTYPE_EC;
+	CTX->pkey.key.ec.curve = curve;
+	CTX->pkey.key.ec.q = CTX->ee_pkey_data;
+	CTX->pkey.key.ec.qlen = qlen;
+
+				}
+				break;
+			case 30: {
+				/* copy-ee-rsa-pkey */
+
+	size_t elen = T0_POP();
+	size_t nlen = T0_POP();
+	memcpy(CTX->ee_pkey_data, CTX->pkey_data, nlen + elen);
+	CTX->pkey.key_type = BR_KEYTYPE_RSA;
+	CTX->pkey.key.rsa.n = CTX->ee_pkey_data;
+	CTX->pkey.key.rsa.nlen = nlen;
+	CTX->pkey.key.rsa.e = CTX->ee_pkey_data + nlen;
+	CTX->pkey.key.rsa.elen = elen;
+
+				}
+				break;
+			case 31: {
+				/* copy-name-SAN */
+
+	unsigned tag = T0_POP();
+	unsigned ok = T0_POP();
+	size_t u, len;
+
+	len = CTX->pad[0];
+	for (u = 0; u < CTX->num_name_elts; u ++) {
+		br_name_element *ne;
+
+		ne = &CTX->name_elts[u];
+		if (ne->status == 0 && ne->oid[0] == 0 && ne->oid[1] == tag) {
+			if (ok && ne->len > len) {
+				memcpy(ne->buf, CTX->pad + 1, len);
+				ne->buf[len] = 0;
+				ne->status = 1;
+			} else {
+				ne->status = -1;
+			}
+			break;
+		}
+	}
+
+				}
+				break;
+			case 32: {
+				/* copy-name-element */
+
+	size_t len;
+	int32_t off = T0_POPi();
+	int ok = T0_POPi();
+
+	if (off >= 0) {
+		br_name_element *ne = &CTX->name_elts[off];
+
+		if (ok) {
+			len = CTX->pad[0];
+			if (len < ne->len) {
+				memcpy(ne->buf, CTX->pad + 1, len);
+				ne->buf[len] = 0;
+				ne->status = 1;
+			} else {
+				ne->status = -1;
+			}
+		} else {
+			ne->status = -1;
+		}
+	}
+
+				}
+				break;
+			case 33: {
+				/* data-get8 */
+
+	size_t addr = T0_POP();
+	T0_PUSH(t0_datablock[addr]);
+
+				}
+				break;
+			case 34: {
+				/* dn-hash-length */
+
+	T0_PUSH(DNHASH_LEN);
+
+				}
+				break;
+			case 35: {
+				/* do-ecdsa-vrfy */
+
+	size_t qlen = T0_POP();
+	int curve = T0_POP();
+	br_x509_pkey pk;
+
+	pk.key_type = BR_KEYTYPE_EC;
+	pk.key.ec.curve = curve;
+	pk.key.ec.q = CTX->pkey_data;
+	pk.key.ec.qlen = qlen;
+	T0_PUSH(verify_signature(CTX, &pk));
+
+				}
+				break;
+			case 36: {
+				/* do-rsa-vrfy */
+
+	size_t elen = T0_POP();
+	size_t nlen = T0_POP();
+	br_x509_pkey pk;
+
+	pk.key_type = BR_KEYTYPE_RSA;
+	pk.key.rsa.n = CTX->pkey_data;
+	pk.key.rsa.nlen = nlen;
+	pk.key.rsa.e = CTX->pkey_data + nlen;
+	pk.key.rsa.elen = elen;
+	T0_PUSH(verify_signature(CTX, &pk));
+
+				}
+				break;
+			case 37: {
+				/* drop */
+ (void)T0_POP(); 
+				}
+				break;
+			case 38: {
+				/* dup */
+ T0_PUSH(T0_PEEK(0)); 
+				}
+				break;
+			case 39: {
+				/* eqOID */
+
+	const unsigned char *a2 = &t0_datablock[T0_POP()];
+	const unsigned char *a1 = &CTX->pad[0];
+	size_t len = a1[0];
+	int x;
+	if (len == a2[0]) {
+		x = -(memcmp(a1 + 1, a2 + 1, len) == 0);
+	} else {
+		x = 0;
+	}
+	T0_PUSH((uint32_t)x);
+
+				}
+				break;
+			case 40: {
+				/* eqblob */
+
+	size_t len = T0_POP();
+	const unsigned char *a2 = (const unsigned char *)CTX + T0_POP();
+	const unsigned char *a1 = (const unsigned char *)CTX + T0_POP();
+	T0_PUSHi(-(memcmp(a1, a2, len) == 0));
+
+				}
+				break;
+			case 41: {
+				/* fail */
+
+	CTX->err = T0_POPi();
+	T0_CO();
+
+				}
+				break;
+			case 42: {
+				/* get16 */
+
+	uint32_t addr = T0_POP();
+	T0_PUSH(*(uint16_t *)(void *)((unsigned char *)CTX + addr));
+
+				}
+				break;
+			case 43: {
+				/* get32 */
+
+	uint32_t addr = T0_POP();
+	T0_PUSH(*(uint32_t *)(void *)((unsigned char *)CTX + addr));
+
+				}
+				break;
+			case 44: {
+				/* match-server-name */
+
+	size_t n1, n2;
+
+	if (CTX->server_name == NULL) {
+		T0_PUSH(0);
+		T0_RET();
+	}
+	n1 = strlen(CTX->server_name);
+	n2 = CTX->pad[0];
+	if (n1 == n2 && eqnocase(&CTX->pad[1], CTX->server_name, n1)) {
+		T0_PUSHi(-1);
+		T0_RET();
+	}
+	if (n2 >= 2 && CTX->pad[1] == '*' && CTX->pad[2] == '.') {
+		size_t u;
+
+		u = 0;
+		while (u < n1 && CTX->server_name[u] != '.') {
+			u ++;
+		}
+		u ++;
+		n1 -= u;
+		if ((n2 - 2) == n1
+			&& eqnocase(&CTX->pad[3], CTX->server_name + u, n1))
+		{
+			T0_PUSHi(-1);
+			T0_RET();
+		}
+	}
+	T0_PUSH(0);
+
+				}
+				break;
+			case 45: {
+				/* neg */
+
+	uint32_t a = T0_POP();
+	T0_PUSH(-a);
+
+				}
+				break;
+			case 46: {
+				/* offset-name-element */
+
+	unsigned san = T0_POP();
+	size_t u;
+
+	for (u = 0; u < CTX->num_name_elts; u ++) {
+		if (CTX->name_elts[u].status == 0) {
+			const unsigned char *oid;
+			size_t len, off;
+
+			oid = CTX->name_elts[u].oid;
+			if (san) {
+				if (oid[0] != 0 || oid[1] != 0) {
+					continue;
+				}
+				off = 2;
+			} else {
+				off = 0;
+			}
+			len = oid[off];
+			if (len != 0 && len == CTX->pad[0]
+				&& memcmp(oid + off + 1,
+					CTX->pad + 1, len) == 0)
+			{
+				T0_PUSH(u);
+				T0_RET();
+			}
+		}
+	}
+	T0_PUSHi(-1);
+
+				}
+				break;
+			case 47: {
+				/* or */
+
+	uint32_t b = T0_POP();
+	uint32_t a = T0_POP();
+	T0_PUSH(a | b);
+
+				}
+				break;
+			case 48: {
+				/* over */
+ T0_PUSH(T0_PEEK(1)); 
+				}
+				break;
+			case 49: {
+				/* read-blob-inner */
+
+	uint32_t len = T0_POP();
+	uint32_t addr = T0_POP();
+	size_t clen = CTX->hlen;
+	if (clen > len) {
+		clen = (size_t)len;
+	}
+	if (addr != 0) {
+		memcpy((unsigned char *)CTX + addr, CTX->hbuf, clen);
+	}
+	if (CTX->do_mhash) {
+		br_multihash_update(&CTX->mhash, CTX->hbuf, clen);
+	}
+	if (CTX->do_dn_hash) {
+		CTX->dn_hash_impl->update(
+			&CTX->dn_hash.vtable, CTX->hbuf, clen);
+	}
+	CTX->hbuf += clen;
+	CTX->hlen -= clen;
+	T0_PUSH(addr + clen);
+	T0_PUSH(len - clen);
+
+				}
+				break;
+			case 50: {
+				/* read8-low */
+
+	if (CTX->hlen == 0) {
+		T0_PUSHi(-1);
+	} else {
+		unsigned char x = *CTX->hbuf ++;
+		if (CTX->do_mhash) {
+			br_multihash_update(&CTX->mhash, &x, 1);
+		}
+		if (CTX->do_dn_hash) {
+			CTX->dn_hash_impl->update(&CTX->dn_hash.vtable, &x, 1);
+		}
+		CTX->hlen --;
+		T0_PUSH(x);
+	}
+
+				}
+				break;
+			case 51: {
+				/* rot */
+ T0_ROT(); 
+				}
+				break;
+			case 52: {
+				/* set16 */
+
+	uint32_t addr = T0_POP();
+	*(uint16_t *)(void *)((unsigned char *)CTX + addr) = T0_POP();
+
+				}
+				break;
+			case 53: {
+				/* set32 */
+
+	uint32_t addr = T0_POP();
+	*(uint32_t *)(void *)((unsigned char *)CTX + addr) = T0_POP();
+
+				}
+				break;
+			case 54: {
+				/* set8 */
+
+	uint32_t addr = T0_POP();
+	*((unsigned char *)CTX + addr) = (unsigned char)T0_POP();
+
+				}
+				break;
+			case 55: {
+				/* start-dn-hash */
+
+	CTX->dn_hash_impl->init(&CTX->dn_hash.vtable);
+	CTX->do_dn_hash = 1;
+
+				}
+				break;
+			case 56: {
+				/* start-tbs-hash */
+
+	br_multihash_init(&CTX->mhash);
+	CTX->do_mhash = 1;
+
+				}
+				break;
+			case 57: {
+				/* stop-tbs-hash */
+
+	CTX->do_mhash = 0;
+
+				}
+				break;
+			case 58: {
+				/* swap */
+ T0_SWAP(); 
+				}
+				break;
+			case 59: {
+				/* zero-server-name */
+
+	T0_PUSHi(-(CTX->server_name == NULL));
+
+				}
+				break;
+			}
+
+		} else {
+			T0_ENTER(ip, rp, t0x);
+		}
+	}
+t0_exit:
+	((t0_context *)t0ctx)->dp = dp;
+	((t0_context *)t0ctx)->rp = rp;
+	((t0_context *)t0ctx)->ip = ip;
+}
+
+
+
+/*
+ * Verify the signature on the certificate with the provided public key.
+ * This function checks the public key type with regards to the expected
+ * type. Returned value is either 0 on success, or a non-zero error code.
+ */
+static int
+verify_signature(br_x509_minimal_context *ctx, const br_x509_pkey *pk)
+{
+	int kt;
+
+	kt = ctx->cert_signer_key_type;
+	if ((pk->key_type & 0x0F) != kt) {
+		return BR_ERR_X509_WRONG_KEY_TYPE;
+	}
+	switch (kt) {
+		unsigned char tmp[64];
+
+	case BR_KEYTYPE_RSA:
+		if (ctx->irsa == 0) {
+			return BR_ERR_X509_UNSUPPORTED;
+		}
+		if (!ctx->irsa(ctx->cert_sig, ctx->cert_sig_len,
+			&t0_datablock[ctx->cert_sig_hash_oid],
+			ctx->cert_sig_hash_len, &pk->key.rsa, tmp))
+		{
+			return BR_ERR_X509_BAD_SIGNATURE;
+		}
+		if (memcmp(ctx->tbs_hash, tmp, ctx->cert_sig_hash_len) != 0) {
+			return BR_ERR_X509_BAD_SIGNATURE;
+		}
+		return 0;
+
+	case BR_KEYTYPE_EC:
+		if (ctx->iecdsa == 0) {
+			return BR_ERR_X509_UNSUPPORTED;
+		}
+		if (!ctx->iecdsa(ctx->iec, ctx->tbs_hash,
+			ctx->cert_sig_hash_len, &pk->key.ec,
+			ctx->cert_sig, ctx->cert_sig_len))
+		{
+			return BR_ERR_X509_BAD_SIGNATURE;
+		}
+		return 0;
+
+	default:
+		return BR_ERR_X509_UNSUPPORTED;
+	}
+}
+
+
diff --git a/third_party/bearssl/src/x509_minimal_full.c b/third_party/bearssl/src/x509_minimal_full.c
new file mode 100644
index 0000000..2b54426
--- /dev/null
+++ b/third_party/bearssl/src/x509_minimal_full.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016 Thomas Pornin <[email protected]>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_x509.h */
+void
+br_x509_minimal_init_full(br_x509_minimal_context *xc,
+	const br_x509_trust_anchor *trust_anchors, size_t trust_anchors_num)
+{
+	/*
+	 * All hash functions are activated.
+	 * Note: the X.509 validation engine will nonetheless refuse to
+	 * validate signatures that use MD5 as hash function.
+	 */
+	static const br_hash_class *hashes[] = {
+		&br_md5_vtable,
+		&br_sha1_vtable,
+		&br_sha224_vtable,
+		&br_sha256_vtable,
+		&br_sha384_vtable,
+		&br_sha512_vtable
+	};
+
+	int id;
+
+	br_x509_minimal_init(xc, &br_sha256_vtable,
+		trust_anchors, trust_anchors_num);
+	br_x509_minimal_set_rsa(xc, &br_rsa_i31_pkcs1_vrfy);
+	br_x509_minimal_set_ecdsa(xc,
+		&br_ec_prime_i31, &br_ecdsa_i31_vrfy_asn1);
+	for (id = br_md5_ID; id <= br_sha512_ID; id ++) {
+		const br_hash_class *hc;
+
+		hc = hashes[id - 1];
+		br_x509_minimal_set_hash(xc, id, hc);
+	}
+}